Files
hospital_performance/extract_ppt_win32.py
2026-02-28 15:16:15 +08:00

87 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
使用pywin32提取旧格式PPT文件内容仅Windows
"""
import os
import json
import sys
def extract_ppt_with_win32(filepath):
"""使用win32com提取PPT内容"""
try:
import win32com.client
# 启动PowerPoint应用
powerpoint = win32com.client.Dispatch("PowerPoint.Application")
powerpoint.Visible = 0 # 不显示窗口
# 打开文件
presentation = powerpoint.Presentations.Open(filepath, WithWindow=False)
content = []
for slide_num, slide in enumerate(presentation.Slides, 1):
slide_content = {
'slide_number': slide_num,
'text': [],
'shapes': []
}
for shape in slide.Shapes:
try:
if shape.HasTextFrame:
text = shape.TextFrame.TextRange.Text.strip()
if text:
slide_content['text'].append(text)
except:
pass
content.append(slide_content)
# 关闭文件
presentation.Close()
powerpoint.Quit()
return content
except Exception as e:
return {'error': str(e)}
def main():
base_dir = r'd:\医院绩效系统'
ref_dir = os.path.join(base_dir, '参考文档')
output_file = os.path.join(base_dir, 'ppt_content.json')
ppt_files = [
'115.《医院绩效管理》[74页].ppt',
'116.加强医院绩效管理[93页].ppt',
'117.临床路径[41页].ppt',
'118.医院护理绩效管理系统的研究[72页].ppt'
]
results = {}
print("使用win32com提取PPT内容...")
print("=" * 60)
for filename in ppt_files:
filepath = os.path.join(ref_dir, filename)
print(f"处理: {filename}")
content = extract_ppt_with_win32(filepath)
results[filename] = content
if 'error' in content:
print(f" 错误: {content['error']}")
else:
print(f" 提取了 {len(content)} 页内容")
# 保存结果
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n内容已保存到: {output_file}")
if __name__ == '__main__':
main()