當前位置：首頁 > 编程语言 > python >内容正文

python

python doc转png踩坑历程分享

發布時間：2025/6/17 python 35 如意码农

生活随笔收集整理的這篇文章主要介紹了 python doc转png踩坑历程分享小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

　　首先python根據文本內容生成doc，使用的是python-docx庫，使用示例如下：

from docx import Document

from docx.shared import Pt, RGBColor

from docx.oxml.ns import qn

from docx.enum.text import WD_PARAGRAPH_ALIGNMENT  #設置對象居中、對齊等

document = Document()

document.styles['Normal'].font.name = u'微軟雅黑'

document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'微軟雅黑')

#  標題：20號字體,居中,加粗,黑色,微軟雅黑,段落間距20

 title = document.add_paragraph()

#  設置段落間距

 title.paragraph_format.line_spacing = 1.3  # 行距

# 設置居中

title.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# 加入文字

run = title.add_run('測試文檔')

# 設置字體大小20

run.font.size = Pt(16)

# 設置字體顏色

run.font.color.rgb = RGBColor(0, 0, 0)

# 加粗

run.font.bold = True

document.save('./a.docx')

　　下面進入重點python doc怎么轉png，調研并測試過這幾個方法：

使用unoconv和libreoffice，doc轉pdf，pdf轉png【環境centos 7】

# 安裝 libreoffice  可將doc轉pdf

yum install libreoffice-writer

yum install unoconv

# 安裝imagemagick

yum install ImageMagick

1.安裝字體庫

yum -y install fontconfig

2.添加中文字體，建立存儲中文字體的文件夾

mkdir /usr/share/fonts/chinese

3.在windows上打開c盤下的Windows/Fonts目錄，一般選擇宋體和黑體，可以看到2個后綴名ttf和ttc的文件，將中文字體復制到Linux中那個chinese文件夾

4.添加權限

chmod -R 755 /usr/share/fonts/chinese

5.安裝ttmkfdir來搜索目錄中所有的字體信息，并匯總生成fonts.scale文件

yum -y install ttmkfdir

6.接下來生成

ttmkfdir -e /usr/share/X11/fonts/encodings/encodings.dir

7.修改字體配置文件

vi /etc/fonts/fonts.conf

8.可以看到一個Font list，即字體列表，添加中文字體文件夾位置，生成緩存

fc-cache

查看中文字體是否被添加進去

fc-list

# 執行轉換

$ unoconv -f pdf -o ./11.pdf 11.docx

$ sz 11.pdf 

$ unoconv -f png -o ./11.png 11.pdf # 只能轉換1張，使用：convert -density 300 12.pdf -alpha off -background white -quality 100 image_transparent.png  #會生成多張 image_transparent-0.png image_transparent-1.png

$ sz 11.png

總結：可行，速度不行

spire.doc，pip安裝庫spire-doc，總結如下：

一個收費庫即有水印，小貴，花不了一點兒（官網價格：https://www.e-iceblue.com/Buy/Spire.Doc.html）。
經測試水印位置，上中下都有，嘗試去水印，去除的不徹底邊緣能看到紅色，或者導致原始文檔內容不連續。
嘗試doc直接轉svg，修改svg水印字顏色和字號，結果轉出的png英文顯示，中文顯示的是框框，還出現字重疊的情況。
速度還是挺快的。

aspose words，pip安裝庫aspose-words，總結如下：

與spire.doc一樣收費，價格沒去看，就看這個水印就放棄了，如圖：

使用unoconv和libreoffice，doc轉pdf，再使用fitz庫pdf轉png，總結如下：

可行，此方案僅僅想在第一種方案上進行速度優化。
實際驗證速度還是不行。

創建帶樣式的模版html，讀doc轉html，替換掉模版正文，DrissionPage讀取本地html文件，截屏存儲為png。

最后這種思路雖然有點繞，不過速度是最快的。
無任何收費，DP最優解。

　　以下是測試代碼：

import os

import platform

from PIL import Image

# import cairosvg

from spire.doc import *

from spire.doc.common import *

# import xml.etree.ElementTree as ET

from lxml import etree as ET

def remove_red_watermark(image_path, output_path, red_threshold=100):

    # 打開圖片并轉換為RGBA格式

    image = Image.open(image_path).convert("RGBA")

    pixels = image.load()

    # 遍歷每個像素

    for x in range(image.width):

        for y in range(image.height):

            r, g, b, a = pixels[x, y]

            # 判斷是否為紅色（可以根據需要調整red_threshold的值）

            if r > red_threshold and g < red_threshold + 20 and b < red_threshold + 20:

                # # 將紅色像素的透明度設置為完全透明

                # pixels[x, y] = (r, g, b, 0)

                # 將紅色像素的顏色改為白色，并保留其不透明度

                pixels[x, y] = (255, 255, 255, a)

    # 保存修改后的圖片

    image.save(output_path, format="PNG")

# 讀取SVG文件

def read_svg(file_path):

    tree = ET.parse(file_path)

    root = tree.getroot()

    return root, tree

# 修改SVG中的<tspan>標簽

def modify_tspan_tags(root, target_fill, new_fill, new_font_size):

    # 遍歷所有<tspan>標簽

    for tspan in root.findall('.//{http://www.w3.org/2000/svg}tspan'):

        # 備份原始的transform屬性值

        original_transform = tspan.attrib.get('transform', '')

        # 檢查fill屬性是否匹配目標值

        if 'fill' in tspan.attrib and tspan.attrib['fill'] == target_fill:

            # 修改fill屬性和font-size

            tspan.attrib['fill'] = new_fill

            tspan.set('font-size', new_font_size)

        # 恢復原始的transform屬性值

        tspan.attrib['transform'] = original_transform

# 保存修改后的SVG文件

def save_svg(tree, output_path):

    tree.write(output_path, encoding="utf-8", xml_declaration=True, standalone="yes")

    # tree.write(output_path)

# 將SVG文件轉換為PNG格式

# def convert_svg_to_png(svg_path, png_path):

#     try:

#         # cairosvg.svg2png(url=svg_path, write_to=png_path)

#         # 使用cairosvg生成PNG的字節流

#         with open(svg_path, "r", encoding="utf-8") as f:

#             svg_content = f.read()

#         cairosvg.svg2png(bytestring=svg_content.encode('GBK'), write_to=png_path)

#

#     except Exception as e:

#         print(f'svg convert png err:{svg_path}')

# 主函數

def main(input_svg_path):

    # 輸入SVG文件路徑

    # 讀取SVG文件

    root, tree = read_svg(input_svg_path)

    # 修改<tspan>標簽

    modify_tspan_tags(root, '#ff0000', 'white', '0')

    # 輸出修改后的SVG文件路徑

    output_svg_path = 'modified_input.svg'

    new_svg = input_svg_path.replace('.svg', output_svg_path)

    # 保存修改后的SVG文件

    save_svg(tree, new_svg)

    print(f"修改后的SVG文件已保存為：{new_svg}")

    # 輸出PNG文件路徑

    output_png_path = new_svg.replace(output_svg_path, '.png')

    # 將修改后的SVG文件轉換為PNG格式

    # convert_svg_to_png(new_svg, output_png_path)

    print(f"修改后的SVG文件已轉換為PNG格式：{output_png_path}")

def gen_png(file_path):

    # 加載Word文檔

    document = Document()

    document.LoadFromFile(file_path)

    # 保存為.png圖片（也可以保存為jpg或bmp等圖片格式）

    new_p = file_path.replace('.docx', '')

    document.SaveToFile(f"{new_p}-.svg")

    # 關閉文檔

    document.Close()

    file_dir = file_path.rsplit('/', 1)[0]

    file_dir = file_dir if platform.system() == 'Linux' else file_path.rsplit('\\', 1)[0]

    print(f'file_dir==={file_dir}')

    for _, _, fs in os.walk(file_dir):

        for ff in fs:

            if ff.endswith('.svg'):

                ff_abs = os.path.join(file_dir, ff)

                main(ff_abs)

def gen_png1(file_path):

    # 加載Word文檔

    document = Document()

    document.LoadFromFile(file_path)

    new_p = file_path.replace('.docx', '')

    # 遍歷所有頁面

    for i in range(document.GetPageCount()):

        # 轉換指定頁面為圖片流

        imageStream = document.SaveImageToStreams(i, ImageType.Bitmap)

        # 保存為.png圖片（也可以保存為jpg或bmp等圖片格式）

        with open(f"{new_p}-{str(i)}_output.png", 'wb') as imageFile:

            imageFile.write(imageStream.ToArray())

        remove_red_watermark(f"{new_p}-{str(i)}_output.png", f"{new_p}-{str(i)}.png")

    # 關閉文檔

    document.Close()

if __name__ == '__main__':

    f_p = r'C:\Users\user\Desktop\test\11.docx'

    f_p = f_p if platform.system() == 'Windows' else '/jjyy/11.docx'

    print(f'f_p==={f_p}')

    # gen_png(f_p)

    gen_png1(f_p)

　　若報以下錯，參考下面文章：

　　sqlite:No module named _sqlite3
　　ImportError: lxml.html.clean module is now a separate project lxml_html_clean

　　DrissionPage.errors.WrongURLError 無效的url，也許要加上"http://"？

總結

以上是生活随笔為你收集整理的python doc转png踩坑历程分享的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：每年元宵你家都是怎么过的？
下一篇： SQL语句between and边界问题

国产亚洲精品久久久久动-影视先锋中文字幕-av网站在线观看一区-亚洲视频 在线观看-久久亚洲不卡-欧美精品一区在线观看-欧美乱淫视频-欧美熟妇另类久久久久久不卡-粉嫩av一区二区三区四区五区-日韩欧美操

python

python doc转png踩坑历程分享

總結

国产亚洲精品久久久久动-影视先锋中文字幕-av网站在线观看一区-亚洲视频在线观看-久久亚洲不卡-欧美精品一区在线观看-欧美乱淫视频-欧美熟妇另类久久久久久不卡-粉嫩av一区二区三区四区五区-日韩欧美操