当前位置：首页 > news >正文

用 Python 实现将长 Markdown 文档从二级标题开始拆分

news 2025/7/6 11:25:09

以下是一个简单的Python脚本，它可以将Markdown文档按照二级标题（##）进行拆分，并保存到指定的输出路径。

import osdef split_markdown_by_headers(input_path, output_folder):# 确保输出文件夹存在if not os.path.exists(output_folder):os.makedirs(output_folder)# 初始化变量current_file = Nonecurrent_content = []# 读取输入文件with open(input_path, 'r', encoding='utf-8') as file:for line in file:# 检查是否是二级标题if line.startswith('## '):# 如果已经有打开的文件，先保存if current_file:save_current_file(current_file, current_content, output_folder)current_content = []  # 重置内容# 新的文件名是二级标题（去掉'## '）current_file = line.lstrip('# ').strip().replace(' ', '_') + '.md'# 将当前行添加到内容列表current_content.append(line)# 保存最后一个文件if current_file:save_current_file(current_file, current_content, output_folder)def save_current_file(filename, content, folder):# 拼接完整的文件路径output_path = os.path.join(folder, filename)# 写入文件with open(output_path, 'w', encoding='utf-8') as file:file.writelines(content)# 自定义输入路径
input_path = input("请输入Markdown文件的路径: ")
# 输出路径固定
output_path = 'output_md_files'# 调用函数进行拆分
split_markdown_by_headers(input_path, output_path)