Hugo Python 脚本:检查损坏的图片链接

这篇文章提供了一个 Python 脚本,用于检查 Hugo 静态网站生成器使用的 Markdown 文件中损坏的图片链接。该脚本扫描 content 目录中的所有 Markdown 文件,提取图片链接,并验证其可访问性。

check_images_hugo.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import re
import urllib.parse

CONTENT_DIR = os.path.join(os.path.dirname(__file__), 'content')
STATIC_DIR = os.path.join(os.path.dirname(__file__), 'static')

# 匹配 Markdown 图片语法的正则表达式:![alt](path)
IMAGE_REGEX = re.compile(r'!\[[^\]]*\]\(([^)]+)\)')

def find_md_files(root):
    for dirpath, _, filenames in os.walk(root):
        for filename in filenames:
            if filename.endswith('.md'):
                yield os.path.join(dirpath, filename)

def check_images():
    missing = []
    for md_file in find_md_files(CONTENT_DIR):
        with open(md_file, 'r', encoding='utf-8') as f:
            content = f.read()
        for match in IMAGE_REGEX.finditer(content):
            img_path = match.group(1)
            # 如果存在查询参数/片段则移除
            img_path_clean = img_path.split('?', 1)[0].split('#', 1)[0]
            img_path_decoded = urllib.parse.unquote(img_path_clean)
            static_img_path = os.path.join(STATIC_DIR, img_path_decoded.lstrip('/'))
            if not os.path.isfile(static_img_path):
                missing.append((md_file, img_path))
    if missing:
        print('Missing images:')
        for md_file, img_path in missing:
            print(f'{md_file}: {img_path}')
    else:
        print('All images found.')

if __name__ == '__main__':
    check_images()

示例输出

run_check_images.sh
python check-images.py
check_images_output.txt
Missing images:
/home/user/TechOverflow/content/post/2023/6/esp32-max31855-thermocouple-lcd-minimal-example-using-platformio.md: /images/2023/06/MAX31855-LCD-example-1024x617.jpg
/home/user/TechOverflow/content/post/2023/6/st7735r-lvgl-live-update-chart-example-for-platformio.md: /images/2023/06/LVGL-Live-update-chart-1024x693.jpg
/home/user/TechOverflow/content/post/2023/2/how-to-pair-bluetooth-gamepad-game-controller-from-amazon.md: /images/2023/02/Gamepad-3-1024x591.jpg
/home/user/TechOverflow/content/post/2023/2/how-to-pair-bluetooth-gamepad-game-controller-from-amazon.md: /images/2023/02/Gamepad-Pairing-1024x614.jpg
/home/user/TechOverflow/content/post/2023/8/how-to-matplotlib-plt-savefig-to-a-io-bytesio-buffer.md: /images/2023/08/plot-1024x768.png
/home/user/TechOverflow/content/post/2023/1/how-to-access-inventree-backup-settings.md: /images/2023/01/Inventree-Backup-Task-1024x706.png
/home/user/TechOverflow/content/post/2023/5/adafruit-st7735r-tft-display-minimal-text-example-for-platformio.md: /images/2023/05/ST7735R-counter-example-2-872x1024.jpg
/home/user/TechOverflow/content/post/2022/8/how-to-save-image-from-jupyter-notebook-using-right-click.md: /images/2022/08/JupyterMenu-194x300.png
/home/user/TechOverflow/content/post/2022/1/pre-and-post-script-for-veeam.md: /images/2022/01/veeam-1-1024x620.png
/home/user/TechOverflow/content/post/2018/12/how-to-use-the-old-classic-editor-in-wordpress-5-0.md: /images/2018/12/Screenshot_20181216_004907-300x49.png
[...]

Check out similar posts by category: Hugo, Python