import os import sys import json import shutil import win32com.client sys.stdout.reconfigure(encoding='utf-8') base_dir = r"D:\医院绩效系统\参考文档" temp_dir = r"D:\temp_docs" # Create temp directory os.makedirs(temp_dir, exist_ok=True) # Get actual file names using os.listdir files = os.listdir(base_dir) # Find files starting with 01-13 key_files = [] for f in sorted(files): if f.startswith('01.') or f.startswith('02.') or f.startswith('03.') or f.startswith('04.') or \ f.startswith('05.') or f.startswith('06.') or f.startswith('07.') or f.startswith('08.') or \ f.startswith('09.') or f.startswith('10.') or f.startswith('11.') or f.startswith('12.') or f.startswith('13.'): key_files.append(f) print(f"Key appendix files: {len(key_files)}") # Copy files to temp directory with simple names file_mapping = {} for i, filename in enumerate(key_files): src = os.path.join(base_dir, filename) simple_name = f"doc{i+1:02d}.doc" dst = os.path.join(temp_dir, simple_name) shutil.copy2(src, dst) file_mapping[simple_name] = filename print(f"Copied: {filename} -> {simple_name}") # Read files using win32com from temp directory print("\n\n=== READING FILES ===\n") results = {} word = win32com.client.Dispatch("Word.Application") word.Visible = False word.DisplayAlerts = False for simple_name, original_name in file_mapping.items(): filepath = os.path.join(temp_dir, simple_name) print(f"\nReading: {original_name}") try: doc = word.Documents.Open(filepath, ReadOnly=True) text = doc.Content.Text tables = [] # Extract tables for table in doc.Tables: table_data = [] for row in table.Rows: row_data = [] for cell in row.Cells: row_data.append(cell.Range.Text.strip()) if any(row_data): table_data.append(row_data) if table_data: tables.append(table_data) doc.Close() results[original_name] = { 'text': text, 'tables': tables } print(f" Success: {len(text)} chars, {len(tables)} tables") except Exception as e: results[original_name] = {'error': str(e)} print(f" Error: {e}") word.Quit() # Cleanup temp files for f in os.listdir(temp_dir): os.remove(os.path.join(temp_dir, f)) os.rmdir(temp_dir) # Save results with open(r"D:\医院绩效系统\kpi_extracted.json", "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n\nSaved to kpi_extracted.json") # Print content summary print("\n\n=== FILE CONTENTS ===\n") for filename, data in results.items(): print(f"\n{'='*80}") print(f"FILE: {filename}") print(f"{'='*80}") if isinstance(data, dict): if 'error' in data: print(f"Error: {data['error']}") elif 'text' in data: text = data['text'] print(f"Text content ({len(text)} chars):") print(text[:4000]) if data.get('tables'): print(f"\nTables ({len(data['tables'])}):") for i, table in enumerate(data['tables']): print(f"\n Table {i+1}:") for row in table: print(f" {' | '.join(str(c)[:80] for c in row)}") else: print(data)