Python使用python-docx库复制Word文档样式的实现方法
目录
- 引言
- 环境准备
- 主要功能实现
- 示例代码
- 结论
- 生成文件代码
- 生成文本列表代码
引言
在日常办公中,我们经常需要处理Word文档的格式调整、内容更新等任务。对于那些希望通过编程手段自动完成这些工作的开发者来说,python及其丰富的第三方库提供了强大的支持。本文将介绍如何使用python-docx库来复制一个Word文档的内容及样式,并展示如何利用此方法进行文档内容的自动化处理。
环境准备
首先,确保你已经安装了python-docx库。如果尚未安装,可以通过以下命令进行安装:
pip install python-docx
主要功能实现
复制段落和文本框样式:通过定义
copy_paragraph_style
函数,我们可以复制旧段落或文本框的样式到新创建的段落或文本框上。识别分页符:
is_page_break
函数帮助我们在文档元素之间识别是否含有分页符,这对于保持文档布局的一致性非常重要。克隆段落和表格:通过
clone_paragraph
和clone_table
函数,我们可以根据旧文档中的段落或表格创建新的段落或表格,并且保留原有的样式设置。复制单元格边框:为了使新生成的表格看起来与原表格一致,我们实现了
copy_cell_borders
函数来复制每个单元格的边框样式。完整文档复制:最后,通过
clone_document
函数,我们可以复制整个文档的内容和样式到一个新的Word文档中。
示例代码
以下是简化版的核心代码示例,展示了如何从旧文档中提取内容并创建一个新的文档:
from docx import Document # 假设其他必需的导入已包含在此处 def clone_document(old_doc_path, new_doc_path, out_text_list): try: # 加载旧文档和创建新文档 old_doc = Document(old_doc_path) new_doc = Document() # 复制主体内容 elements = old_doc.element.body para_index = 0 table_index = 0 index = 0 while index < len(elements): element = elements[index] if element.tag.endswith('p'): # 处理段落... para_index += 1 elif element.tag.endswith('tbl'): # 处理表格... table_index += 1 index += 1 # 保存新文档 new_doc.save(new_doc_path) print(f"文档已成功保存至:{new_doc_path}") except Exception as e: print(f"复制文档时发生错误:{e}")
结论
通过上述方法,我们可以高效地复制Word文档的内容和样式,这为文档处理自动化提供了一种有效的解决方案。当然,根据实际需求的不同,你可以进一步扩展和完善这个基础框架,比如添加对更多样式的支持、优化性能等。
希望这篇文章能为你提供有价值的参考,帮助你在日常工作中更高效地处理Word文档。
生成文件代码
from docx import Document from docx.enum.text import WD_BREAK from docx.oXML import OxmlElement from docx.oxml.shared import qn from copy_word_only_text_json import clone_document as gen_to_list def copy_paragraph_style(run_from, run_to): """复制 run 的样式""" run_to.bold = run_from.bold run_to.italic = run_from.italic run_to.underline = run_from.underline run_to.font.size = run_from.font.size run_to.font.color.rgb = run_from.font.color.rgb run_to.font.name = run_from.font.name run_to.font.alwww.devze.coml_caps = run_from.font.all_caps run_to.font.strike = run_from.font.strike run_to.font.shadow = run_from.font.shadow def is_page_break(element): """判断元素是否为分页符(段落或表格后)""" if element.tag.endswith('p'): for child in element: if child.tag.endswith('br') and child.get(qn('type')) == 'page': return True elif element.tag.endswith('tbl'): # 表格后可能有分页符(通过下一个元素判断) if element.getnext() is not None: next_element = element.getnext() if next_element.tag.endswith('p'): for child in next_element: if child.tag.endswith('br') and child.get(qn('type')) == 'page': return True return False def clone_paragraph(old_para, new_doc, out_text_list): """根据旧段落创建新段落""" new_para = new_doc.add_paragraph() if old_para.style: new_para.style = old_para.style for old_run in old_para.runs: new_run = new_para.add_run(out_text_list.pop(0)) copy_paragraph_style(old_run, new_run) new_para.alignment = old_para.alignment return new_para def copy_cell_borders(old_cell, new_cell): """复制单元格的边框样式""" old_tc = old_cell._tc new_tc = new_cell._tc old_borders = old_tc.xpath('.//w:tcBorders') if old_borders: old_border = old_borders[0] new_border = OxmlElement('w:tcBorders') border_types = ['top', 'left', 'bottom', 'right', 'insideH', 'insideV'] for border_type in border_types: old_element = old_border.find(f'.//w:{border_type}', namespaces={ 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' }) if old_element is not None: new_element = OxmlElement(f'w:{border_type}') for attr, value in old_element.attrib.items(): new_element.set(attr, value) new_border.append(new_element) tc_pr = new_tc.get_or_add_tcPr() tc_pr.append(new_border) def clone_table(old_table, new_doc, out_text_list): """根据旧表格创建新表格""" new_table = new_doc.add_table(rows=len(old_table.rows), cols=len(old_table.columns)) if old_table.style: new_table.style = old_table.style for i, old_row in enumerate(old_table.rows): for j, old_cell in enumerate(old_row.cells): new_cell = new_table.cell(i, j) for paragraph in new_cell.paragraphs: new_cell._element.remove(paragraph._element) for old_paragraph in old_cell.paragraphs: new_paragraph = new_cell.add_paragraph() for old_run in old_paragraph.runs: new_run = new_paragraph.add_run(out_text_list.pop(0)) copy_paragraph_style(old_run, new_run) new_paragraph.alignment = old_paragraph.alignment copy_cell_borders(old_cell, new_cell) for i, col in enumerate(old_table.columns): if col.width is not None: new_table.columns[i].width = col.width return new_table def clone_document(old_doc_path, new_doc_path, out_text_list ): # global out_text_list try: old_doc = Document(old_doc_path) new_doc = Document() # # 复制分节符及页眉页脚 # for old_section in old_doc.sections: # new_section = new_doc.add_section(start_type=old_section.start_type) # new_section.left_margin = old_section.left_margin # new_section.right_margin = old_section.right_margin # # 其他分节符属性... # # # 页眉 # for para in old_section.header.paragraphs: # new_para = new_section.header.add_paragraph() # for run in para.runs: # new_run = new_para.add_run(run.text) # copy_paragraph_style(run, new_run) # new_para.alignment = para.alignment # # # 页脚 # for para in old_section.footer.paragraphs: # new_para = new_section.footer.add_paragraph() # for run in para.runs: # new_run = new_para.add_run(run.text) # copy_paragraph_style(run, new_run) # new_para.alignment = para.alignment # 复制主体内容 elements = old_doc.element.body para_index = 0 table_index = 0 index = 0 while index < len(elements): element = elements[index] if element.tag.endswith('p'): old_para = old_doc.paragraphs[para_index] clone_paragraph(old_para, new_doc, out_text_list) para_index += 1 index += 1 elif element.tag.endswith('tbl'): old_table = old_doc.tables[table_index] clone_table(old_table, new_doc, out_text_list) table_index +js= 1 index += 1 elif element.tag.endswith('br') and element.get(qn('type')) == 'page': if index > 0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 else: index += 1 # 检查分页符 if index < len(elements) and is_page_break(elements[index]): if index > 0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 if new_doc_path: new_doc.save(new_doc_path) print(f"文档已成功保存至:{new_doc_path}") else: return out_text_list except Exception as e: print(f"复制文档时发生错误:{e}") # 使用示例 if __name__ == "__main__": out = gen_to_list('.docx', '') if out: print("文档内容:\n", out, """按照顺序的文档内容请根据用户要求更改文档内容,而不改变顺序,且不改变内容个数,最后将内容list 输出为到给定的json中 ```json {"输出":[]} ``` 用户输入:请润色 """) print("请求llm") print("提取json") print("填入模版") out = clone_document('.docx', 'only_text.docx',out)
生成文本列表代码
from docx import Document from docx.enum.text import WD_BREAK from docx.oxml import OxmlElement from docx.oxml.shared import qn def copy_paragraph_style(run_from, run_to): """复制 run 的样式""" run_to.bold = run_from.bold run_to.italic = run_from.italic run_to.underline = run_from.underline run_to.font.size = run_from.font.size run_to.font.color.rgb = run_from.font.color.rgb run_to.font.name = run_from.font.name run_to.font.all_caps = run_from.font.all_caps run_to.font.strike = run_from.font.strike run_to.font.shadow = run_from.font.shadow def is_page_break(element): """判断元素是否为分页符(段落或表格后)""" if element.tag.endswith('p'): for child in element: if child.tag.endswith('br') and child.get(qn('type')) == 'page': return True elif element.tag.endswith('tbl'): # 表格后可能有分页符(通过下一个元素判断) if element.getnext() is not None: next_element = element.getnext() if next_element.tag.endswith('p'): for child in next_element: if child.tag.endswith('br') and child.get(qn('type')) == 'page': return True return False def clone_paragraph(old_para, new_doc,out_text_list): """根据旧段落创建新段落""" new_para = new_doc.add_paragraph() if old_para.style: new_para.style = old_para.style for old_run in old_para.runs: out_text_list.append(old_run.text) new_run = new_para.add_run(old_run.text) copy_paragraph_style(old_run, new_run) new_para.alignment = old_para.alignment return new_para def copy_cell_borders(old_cell, new_cell): """复制单元格的边框样式""" old_tc = old_cell._tc new_tc = new_cell._tc old_borders = old_tc.xpath('.//w:tcBorders') if old_borders: old_border = old_borders[0] new_border = OxmlElement('w:tcBorders') border_types = ['top', 'left', 'bottom', 'right', 'insideH', 'insideV'] for border_type in border_types: old_element = old_border.find(f'.//w:{border_type}', namespaces={ 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' }) if old_element is not None: new_element = OxmlElement(f'w:{border_type}') for attr, value in old_element.attrib.items(): new_element.set(attr, value) new_border.append(new_element) tc_pr = new_tc.get_or_add_tcPr() tc_pr.append(new_border) def clone_table(old_table, new_doc,out_text_list): """根据旧表格创建新表格""" new_table = new_doc.add_table(rows=len(old_table.rows), cols=len(old_table.columns)) if old_table.style: new_table.style = old_table.style for i, old_row in enumerate(old_table.rows): for j, old_cell in enumerate(old_row.cells): new_cell = new_table.cell(i, j) for paragraph in new_cell.paragraphs: new_cell._element.remove(paragraph._element) for old_paragraph in old_cell.paragraphs: new_paragraph = new_cell.add_paragraph() for old_run in old_paragraph.runs: out_text_list.append(old_run.text) new_run = new_paragraph.add_run(old_run.text) copy_paragraph_style(old_run, new_run) new_paragraph.alignment = old_paragraph.alignment copy_cell_borders(old_cell, new_cell) for i, col in enumerate(old_table.columns): if col.width is not None: new_table.columns[i].width = col.width return new_table def clone_document(old_doc_path, new_doc_path): # global out_text_list out_text_list = [] try: old_doc = Document(old_doc_path) new_doc = Document() # # 复制分节符及页眉页脚 # for old_jssection in old_doc.sections: # new_section = new_doc.add_section(start_type=old_section.start_type) # new_section.left_margin = old_section.left_margin # new_section.right_margin = old_section.right_margin # # 其他分节符属性... # # # 页js眉 # for para in old_section.header.paragraphs: # new_para = new_section.header.add_paragraph() # for run in para.runs: # new_run = new_para.add_run(run.text) # copy_paragraph_style(run, new_run) # new_para.alignment = para.alignment # # # 页脚 # for para in old_section.footer.paragraphs: # new_para = new_section.footer.add_paragraph() # for run in para.runs: # new_run = new_para.add_run(run.text) # copy_paragraph_style(run, new_run) # new_para.alignment = para.alignment # 复制主体内容 elements = old_doc.element.body para_index = 0 table_index = 0 index = 0 while index < len(elements): element = elements[index] if element.tag.endswith('p'): old_para = old_doc.paragraphs[para_index] clone_paragraph(old_para, new_doc,out_text_list) para_index += 1 index += 1 elif element.tag.endswith('tbl'): old_tabhttp://www.devze.comle = old_doc.tables[table_index] clone_table(old_table, new_doc,out_text_list) table_index += 1 index += 1 elif element.tag.endswith('br') and element.get(qn('type')) == 'page': if index > 0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 else: index += 1 # 检查分页符 if index < len(elements) and is_page_break(elements[index]): if index > 0: new_doc.add_paragraph().add_run().add_break(WD_BREAK.PAGE) index += 1 if new_doc_path: new_doc.save(new_doc_path) print(f"文档已成功保存至:{new_doc_path}") else: return out_text_list except Exception as e: print(f"复制文档时发生错误:{e}") # 使用示例 if __name__ == "__main__": out=clone_document('南山三防工作专报.docx', '') if out: print("文档内容:\n",out,"""按照顺序的文档内容请根据用户要求更改文档内容,而不改变顺序,且不改变内容个数,最后将内容list 输出为到给定的json中 ```json {"输出":[]} ``` 用户输入:请润色 """) print("请求llm") print("提取json") print("填入模版")
到此这篇关于Python使用python-docx库复制Word文档样式的实现方法的文章就介绍到这了,更多相关Python python-docx库复制Word样式内容请搜索编程客栈(www.devze.com)以前的文章或继续浏览下面的相关文章希望大家以后多多支持编程客栈(www.devze.com)!
精彩评论