87 lines
2.5 KiB
Python
87 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
使用pywin32提取旧格式PPT文件内容(仅Windows)
|
||
"""
|
||
import os
|
||
import json
|
||
import sys
|
||
|
||
def extract_ppt_with_win32(filepath):
|
||
"""使用win32com提取PPT内容"""
|
||
try:
|
||
import win32com.client
|
||
|
||
# 启动PowerPoint应用
|
||
powerpoint = win32com.client.Dispatch("PowerPoint.Application")
|
||
powerpoint.Visible = 0 # 不显示窗口
|
||
|
||
# 打开文件
|
||
presentation = powerpoint.Presentations.Open(filepath, WithWindow=False)
|
||
|
||
content = []
|
||
for slide_num, slide in enumerate(presentation.Slides, 1):
|
||
slide_content = {
|
||
'slide_number': slide_num,
|
||
'text': [],
|
||
'shapes': []
|
||
}
|
||
|
||
for shape in slide.Shapes:
|
||
try:
|
||
if shape.HasTextFrame:
|
||
text = shape.TextFrame.TextRange.Text.strip()
|
||
if text:
|
||
slide_content['text'].append(text)
|
||
except:
|
||
pass
|
||
|
||
content.append(slide_content)
|
||
|
||
# 关闭文件
|
||
presentation.Close()
|
||
powerpoint.Quit()
|
||
|
||
return content
|
||
|
||
except Exception as e:
|
||
return {'error': str(e)}
|
||
|
||
|
||
def main():
|
||
base_dir = r'd:\医院绩效系统'
|
||
ref_dir = os.path.join(base_dir, '参考文档')
|
||
output_file = os.path.join(base_dir, 'ppt_content.json')
|
||
|
||
ppt_files = [
|
||
'115.《医院绩效管理》[74页].ppt',
|
||
'116.加强医院绩效管理[93页].ppt',
|
||
'117.临床路径[41页].ppt',
|
||
'118.医院护理绩效管理系统的研究[72页].ppt'
|
||
]
|
||
|
||
results = {}
|
||
|
||
print("使用win32com提取PPT内容...")
|
||
print("=" * 60)
|
||
|
||
for filename in ppt_files:
|
||
filepath = os.path.join(ref_dir, filename)
|
||
print(f"处理: {filename}")
|
||
content = extract_ppt_with_win32(filepath)
|
||
results[filename] = content
|
||
|
||
if 'error' in content:
|
||
print(f" 错误: {content['error']}")
|
||
else:
|
||
print(f" 提取了 {len(content)} 页内容")
|
||
|
||
# 保存结果
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"\n内容已保存到: {output_file}")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|