提交文件

2026-02-28 15:16:15 +08:00
parent 1a4e50e0a4
commit 44f250f58e
159 changed files with 61268 additions and 0 deletions
--- a/read_key_docs.py
+++ b/read_key_docs.py
@@ -0,0 +1,111 @@
+import os
+import sys
+import docx
+import pdfplumber
+import json
+
+sys.stdout.reconfigure(encoding='utf-8')
+
+base_dir = r"D:\医院绩效系统\参考文档"
+
+# Get all files
+all_files = os.listdir(base_dir)
+print(f"Total files found: {len(all_files)}")
+
+# Find files by pattern matching
+def find_files(patterns):
+    found = []
+    for f in all_files:
+        for p in patterns:
+            if p in f:
+                found.append(f)
+                break
+    return found
+
+# Key patterns to search for
+key_patterns = [
+    "附表一", "附表二", "附表三", "附表四", "附表五", "附表六", 
+    "附表七", "附表八", "附表九", "附表十", "附表十一", "附表十二", "附表十三",
+    "一票否决", "职能科室公共", "护理部", "院感", "医保", "药学",
+    "手术临床", "非手术", "医疗技术", "医疗辅助", "行政科室",
+    "职工绩效", "KPI"
+]
+
+key_files = find_files(key_patterns)
+print(f"\nKey assessment files found: {len(key_files)}")
+for f in sorted(key_files):
+    print(f"  - {f}")
+
+def read_docx(filepath):
+    try:
+        doc = docx.Document(filepath)
+        text = []
+        for para in doc.paragraphs:
+            if para.text.strip():
+                text.append(para.text.strip())
+        for table in doc.tables:
+            for row in table.rows:
+                row_text = []
+                for cell in row.cells:
+                    row_text.append(cell.text.strip())
+                if any(row_text):
+                    text.append(" | ".join(row_text))
+        return "\n".join(text)
+    except Exception as e:
+        return f"Error: {e}"
+
+def read_doc(filepath):
+    try:
+        with open(filepath, 'rb') as f:
+            raw = f.read()
+        # Try different encodings
+        for enc in ['utf-8', 'gbk', 'gb2312', 'latin-1']:
+            try:
+                return raw.decode(enc)
+            except:
+                continue
+        return raw.decode('utf-8', errors='ignore')
+    except Exception as e:
+        return f"Error: {e}"
+
+def read_pdf(filepath):
+    try:
+        text = []
+        with pdfplumber.open(filepath) as pdf:
+            for page in pdf.pages:
+                page_text = page.extract_text()
+                if page_text:
+                    text.append(page_text)
+        return "\n".join(text)
+    except Exception as e:
+        return f"Error: {e}"
+
+# Read and save key files
+results = {}
+for filename in sorted(key_files)[:20]:  # Limit to first 20
+    filepath = os.path.join(base_dir, filename)
+    print(f"\nReading: {filename}")
+    if filename.endswith('.docx'):
+        content = read_docx(filepath)
+    elif filename.endswith('.doc'):
+        content = read_doc(filepath)
+    elif filename.endswith('.pdf'):
+        content = read_pdf(filepath)
+    else:
+        continue
+    results[filename] = content
+    print(f"Content length: {len(content)} chars")
+
+# Save results
+with open(r"D:\医院绩效系统\key_content.json", "w", encoding="utf-8") as f:
+    json.dump(results, f, ensure_ascii=False, indent=2)
+
+print(f"\n\nSaved {len(results)} files to key_content.json")
+
+# Print content
+for filename, content in results.items():
+    print(f"\n{'='*80}")
+    print(f"FILE: {filename}")
+    print(f"{'='*80}")
+    preview = content[:4000] if len(content) > 4000 else content
+    print(preview)