提交文件

This commit is contained in:
2026-02-28 15:16:15 +08:00
parent 1a4e50e0a4
commit 44f250f58e
159 changed files with 61268 additions and 0 deletions

86
extract_ppt_win32.py Normal file
View File

@@ -0,0 +1,86 @@
#!/usr/bin/env python3
"""
使用pywin32提取旧格式PPT文件内容仅Windows
"""
import os
import json
import sys
def extract_ppt_with_win32(filepath):
"""使用win32com提取PPT内容"""
try:
import win32com.client
# 启动PowerPoint应用
powerpoint = win32com.client.Dispatch("PowerPoint.Application")
powerpoint.Visible = 0 # 不显示窗口
# 打开文件
presentation = powerpoint.Presentations.Open(filepath, WithWindow=False)
content = []
for slide_num, slide in enumerate(presentation.Slides, 1):
slide_content = {
'slide_number': slide_num,
'text': [],
'shapes': []
}
for shape in slide.Shapes:
try:
if shape.HasTextFrame:
text = shape.TextFrame.TextRange.Text.strip()
if text:
slide_content['text'].append(text)
except:
pass
content.append(slide_content)
# 关闭文件
presentation.Close()
powerpoint.Quit()
return content
except Exception as e:
return {'error': str(e)}
def main():
base_dir = r'd:\医院绩效系统'
ref_dir = os.path.join(base_dir, '参考文档')
output_file = os.path.join(base_dir, 'ppt_content.json')
ppt_files = [
'115.《医院绩效管理》[74页].ppt',
'116.加强医院绩效管理[93页].ppt',
'117.临床路径[41页].ppt',
'118.医院护理绩效管理系统的研究[72页].ppt'
]
results = {}
print("使用win32com提取PPT内容...")
print("=" * 60)
for filename in ppt_files:
filepath = os.path.join(ref_dir, filename)
print(f"处理: {filename}")
content = extract_ppt_with_win32(filepath)
results[filename] = content
if 'error' in content:
print(f" 错误: {content['error']}")
else:
print(f" 提取了 {len(content)} 页内容")
# 保存结果
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"\n内容已保存到: {output_file}")
if __name__ == '__main__':
main()