提交文件

2026-02-28 15:16:15 +08:00
parent 1a4e50e0a4
commit 44f250f58e
159 changed files with 61268 additions and 0 deletions
--- a/analyze_docx.py
+++ b/analyze_docx.py
@@ -0,0 +1,87 @@
+import os
+import sys
+import docx
+import json
+
+sys.stdout.reconfigure(encoding='utf-8')
+
+base_dir = r"D:\医院绩效系统\参考文档"
+
+def read_docx(filepath):
+    """Read .docx file and extract text and tables"""
+    try:
+        doc = docx.Document(filepath)
+        result = {
+            'paragraphs': [],
+            'tables': []
+        }
+        
+        # Extract paragraphs
+        for para in doc.paragraphs:
+            if para.text.strip():
+                result['paragraphs'].append(para.text.strip())
+        
+        # Extract tables
+        for table in doc.tables:
+            table_data = []
+            for row in table.rows:
+                row_data = []
+                for cell in row.cells:
+                    row_data.append(cell.text.strip())
+                if any(row_data):
+                    table_data.append(row_data)
+            if table_data:
+                result['tables'].append(table_data)
+        
+        return result
+    except Exception as e:
+        return {'error': str(e)}
+
+# Get all docx files
+docx_files = [f for f in os.listdir(base_dir) if f.endswith('.docx')]
+print(f"Found {len(docx_files)} .docx files\n")
+
+# Read and analyze each file
+all_content = {}
+for filename in sorted(docx_files):
+    filepath = os.path.join(base_dir, filename)
+    print(f"Reading: {filename}")
+    content = read_docx(filepath)
+    all_content[filename] = content
+    
+    # Print summary
+    print(f"  Paragraphs: {len(content.get('paragraphs', []))}")
+    print(f"  Tables: {len(content.get('tables', []))}")
+    if content.get('tables'):
+        for i, table in enumerate(content['tables']):
+            print(f"    Table {i+1}: {len(table)} rows x {len(table[0]) if table else 0} cols")
+
+# Save to JSON
+with open(r"D:\医院绩效系统\docx_content.json", "w", encoding="utf-8") as f:
+    json.dump(all_content, f, ensure_ascii=False, indent=2)
+
+print(f"\nSaved content to docx_content.json")
+
+# Print detailed content for key assessment files
+key_files = [f for f in docx_files if any(k in f for k in ['考核', '评分', '职能'])]
+print(f"\n\n=== DETAILED CONTENT FOR KEY ASSESSMENT FILES ===\n")
+
+for filename in sorted(key_files):
+    content = all_content.get(filename, {})
+    print(f"\n{'='*80}")
+    print(f"FILE: {filename}")
+    print(f"{'='*80}")
+    
+    # Print paragraphs
+    if content.get('paragraphs'):
+        print("\n--- Paragraphs ---")
+        for p in content['paragraphs'][:20]:
+            print(p)
+    
+    # Print tables
+    if content.get('tables'):
+        print("\n--- Tables ---")
+        for i, table in enumerate(content['tables']):
+            print(f"\nTable {i+1}:")
+            for row in table:
+                print(" | ".join(str(cell) for cell in row))