#!/usr/bin/env python3 """ 使用pywin32提取旧格式PPT文件内容(仅Windows) """ import os import json import sys def extract_ppt_with_win32(filepath): """使用win32com提取PPT内容""" try: import win32com.client # 启动PowerPoint应用 powerpoint = win32com.client.Dispatch("PowerPoint.Application") powerpoint.Visible = 0 # 不显示窗口 # 打开文件 presentation = powerpoint.Presentations.Open(filepath, WithWindow=False) content = [] for slide_num, slide in enumerate(presentation.Slides, 1): slide_content = { 'slide_number': slide_num, 'text': [], 'shapes': [] } for shape in slide.Shapes: try: if shape.HasTextFrame: text = shape.TextFrame.TextRange.Text.strip() if text: slide_content['text'].append(text) except: pass content.append(slide_content) # 关闭文件 presentation.Close() powerpoint.Quit() return content except Exception as e: return {'error': str(e)} def main(): base_dir = r'd:\医院绩效系统' ref_dir = os.path.join(base_dir, '参考文档') output_file = os.path.join(base_dir, 'ppt_content.json') ppt_files = [ '115.《医院绩效管理》[74页].ppt', '116.加强医院绩效管理[93页].ppt', '117.临床路径[41页].ppt', '118.医院护理绩效管理系统的研究[72页].ppt' ] results = {} print("使用win32com提取PPT内容...") print("=" * 60) for filename in ppt_files: filepath = os.path.join(ref_dir, filename) print(f"处理: {filename}") content = extract_ppt_with_win32(filepath) results[filename] = content if 'error' in content: print(f" 错误: {content['error']}") else: print(f" 提取了 {len(content)} 页内容") # 保存结果 with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n内容已保存到: {output_file}") if __name__ == '__main__': main()