git_cleaner.py: Rekursiv git gc auf allen Repositories ausführen
Dieses Skript, git_cleaner.py, wurde entwickelt, um ein angegebenes Verzeichnis (oder standardmäßig das aktuelle Verzeichnis) rekursiv nach Git-Repositorys zu durchsuchen und git gc auf jedem auszuführen. Es erfasst die Größe und Dateianzahl des .git-Verzeichnisses vor und nach der Ausführung von git gc, sodass Sie sehen können, wie viel Platz zurückgewonnen wurde und wie viele Dateien entfernt wurden. Das Skript bietet außerdem eine endgültige Zusammenfassung aller verarbeiteten Repositorys.
git_cleaner.py
#!/usr/bin/env python3
import os
import subprocess
import argparse
import sys
def get_dir_stats(path):
"""Returns a tuple of (total_size_bytes, total_file_count)."""
total_size = 0
file_count = 0
for dirpath, _, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
file_count += 1
return total_size, file_count
def format_size(bytes_size):
"""Convert bytes to a human-readable string."""
for unit in ['B', 'KB', 'MB', 'GB']:
if abs(bytes_size) < 1024:
return f"{bytes_size:.2f} {unit}"
bytes_size /= 1024
return f"{bytes_size:.2f} TB"
def main():
parser = argparse.ArgumentParser(
description="Recursively run 'git gc', reporting space and files saved."
)
parser.add_argument(
"target_dir",
nargs="?",
default=".",
help="Directory to search (default: '.')"
)
args, unknown_args = parser.parse_known_args()
target_path = os.path.abspath(args.target_dir)
if not os.path.isdir(target_path):
print(f"Error: {target_path} is not a directory.")
sys.exit(1)
total_saved_bytes = 0
total_files_removed = 0
repos_processed = 0
print(f"--- Scanning: {target_path} ---")
for root, dirs, _ in os.walk(target_path):
if ".git" in dirs:
repos_processed += 1
git_dir = os.path.join(root, ".git")
# 1. Capture stats before
size_before, files_before = get_dir_stats(git_dir)
print(f"\n[{repos_processed}] Optimizing: {root}")
try:
# 2. Run git gc
# Note: We use -C to run git in the specific directory
cmd = ["git", "-C", root, "gc"] + unknown_args
subprocess.run(cmd, check=True, capture_output=True)
# 3. Capture stats after
size_after, files_after = get_dir_stats(git_dir)
saved_size = size_before - size_after
removed_files = files_before - files_after
total_saved_bytes += max(0, saved_size)
total_files_removed += max(0, removed_files)
print(f" Files: {files_before} -> {files_after} ({removed_files} removed)")
print(f" Size: {format_size(size_before)} -> {format_size(size_after)} ({format_size(saved_size)} saved)")
except subprocess.CalledProcessError as e:
print(f" [!] Error: {e.stderr.decode().strip()}")
# Efficiency: don't walk into the .git folder we just processed
dirs.remove(".git")
# Final Summary Table
print("\n" + "="*45)
print(f"{'FINAL SUMMARY':^45}")
print("-" * 45)
print(f" Repositories Processed : {repos_processed}")
print(f" Total Space Reclaimed : {format_size(total_saved_bytes)}")
print(f" Total Files Removed : {total_files_removed}")
print("="*45)
if __name__ == "__main__":
main()Beispielausgabe
git_cleaner_output.txt
[...]
[433] Optimizing: /home/uli/dev/FlareDNS
Files: 73 -> 33 (40 removed)
Size: 70.14 KB -> 47.03 KB (23.10 KB saved)
=============================================
FINAL SUMMARY
---------------------------------------------
Repositories Processed : 433
Total Space Reclaimed : 238.14 MB
Total Files Removed : 21612
=============================================If this post helped you, please consider buying me a coffee or donating via PayPal to support research & publishing of new posts on TechOverflow