提交文件
This commit is contained in:
86
extract_ppt_win32.py
Normal file
86
extract_ppt_win32.py
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
使用pywin32提取旧格式PPT文件内容(仅Windows)
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
|
||||
def extract_ppt_with_win32(filepath):
|
||||
"""使用win32com提取PPT内容"""
|
||||
try:
|
||||
import win32com.client
|
||||
|
||||
# 启动PowerPoint应用
|
||||
powerpoint = win32com.client.Dispatch("PowerPoint.Application")
|
||||
powerpoint.Visible = 0 # 不显示窗口
|
||||
|
||||
# 打开文件
|
||||
presentation = powerpoint.Presentations.Open(filepath, WithWindow=False)
|
||||
|
||||
content = []
|
||||
for slide_num, slide in enumerate(presentation.Slides, 1):
|
||||
slide_content = {
|
||||
'slide_number': slide_num,
|
||||
'text': [],
|
||||
'shapes': []
|
||||
}
|
||||
|
||||
for shape in slide.Shapes:
|
||||
try:
|
||||
if shape.HasTextFrame:
|
||||
text = shape.TextFrame.TextRange.Text.strip()
|
||||
if text:
|
||||
slide_content['text'].append(text)
|
||||
except:
|
||||
pass
|
||||
|
||||
content.append(slide_content)
|
||||
|
||||
# 关闭文件
|
||||
presentation.Close()
|
||||
powerpoint.Quit()
|
||||
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
base_dir = r'd:\医院绩效系统'
|
||||
ref_dir = os.path.join(base_dir, '参考文档')
|
||||
output_file = os.path.join(base_dir, 'ppt_content.json')
|
||||
|
||||
ppt_files = [
|
||||
'115.《医院绩效管理》[74页].ppt',
|
||||
'116.加强医院绩效管理[93页].ppt',
|
||||
'117.临床路径[41页].ppt',
|
||||
'118.医院护理绩效管理系统的研究[72页].ppt'
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
||||
print("使用win32com提取PPT内容...")
|
||||
print("=" * 60)
|
||||
|
||||
for filename in ppt_files:
|
||||
filepath = os.path.join(ref_dir, filename)
|
||||
print(f"处理: {filename}")
|
||||
content = extract_ppt_with_win32(filepath)
|
||||
results[filename] = content
|
||||
|
||||
if 'error' in content:
|
||||
print(f" 错误: {content['error']}")
|
||||
else:
|
||||
print(f" 提取了 {len(content)} 页内容")
|
||||
|
||||
# 保存结果
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n内容已保存到: {output_file}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user