feat : automatic PDF compression on upload

- Add PdfCompressorService for lossless compression with qpdf
- Add DocumentPdfCompressorListener for automatic compression on persist/update
- Add app:compress-pdf command for batch compression of existing PDFs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Matthieu
2026-01-25 19:02:26 +01:00
parent b51671b1d4
commit a5118305d3
3 changed files with 302 additions and 0 deletions

View File

@@ -0,0 +1,175 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Repository\DocumentRepository;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
#[AsCommand(
name: 'app:compress-pdf',
description: 'Compress all PDF documents stored in database without quality loss',
)]
class CompressPdfCommand extends Command
{
public function __construct(
private readonly DocumentRepository $documentRepository,
private readonly EntityManagerInterface $em,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be compressed without actually doing it')
;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$dryRun = $input->getOption('dry-run');
// Check if qpdf is installed
exec('which qpdf', $qpdfPath, $returnCode);
if (0 !== $returnCode) {
$io->error('qpdf is not installed. Run: sudo apt install qpdf');
return Command::FAILURE;
}
$documents = $this->documentRepository->findBy(['mimeType' => 'application/pdf']);
if (empty($documents)) {
$io->info('No PDF documents found.');
return Command::SUCCESS;
}
$io->title('PDF Compression');
$io->text(sprintf('Found %d PDF documents', count($documents)));
$totalSaved = 0;
$compressed = 0;
foreach ($documents as $document) {
$base64Data = $document->getPath();
// Remove data URI prefix if present
if (str_contains($base64Data, ',')) {
$base64Data = explode(',', $base64Data, 2)[1];
}
$pdfContent = base64_decode($base64Data, true);
if (false === $pdfContent) {
$io->warning(sprintf('Failed to decode document: %s', $document->getName()));
continue;
}
$originalSize = strlen($pdfContent);
if ($dryRun) {
$io->text(sprintf(
' [DRY-RUN] Would compress: %s (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
continue;
}
// Create temp files
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
file_put_contents($tempInput, $pdfContent);
// Compress with qpdf (lossless)
$command = sprintf(
'qpdf --linearize --object-streams=generate %s %s 2>&1',
escapeshellarg($tempInput),
escapeshellarg($tempOutput)
);
exec($command, $cmdOutput, $returnCode);
if (0 !== $returnCode || !file_exists($tempOutput)) {
$io->warning(sprintf('Failed to compress: %s', $document->getName()));
@unlink($tempInput);
@unlink($tempOutput);
continue;
}
$compressedContent = file_get_contents($tempOutput);
$compressedSize = strlen($compressedContent);
// Only update if we actually saved space
if ($compressedSize < $originalSize) {
$saved = $originalSize - $compressedSize;
$totalSaved += $saved;
++$compressed;
// Rebuild base64 with data URI prefix
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
$document->setPath($newBase64);
$document->setSize($compressedSize);
$io->text(sprintf(
' ✓ %s: %s → %s (-%s, -%.1f%%)',
$document->getName(),
$this->formatBytes($originalSize),
$this->formatBytes($compressedSize),
$this->formatBytes($saved),
($saved / $originalSize) * 100
));
} else {
$io->text(sprintf(
' - %s: Already optimal (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
}
@unlink($tempInput);
@unlink($tempOutput);
}
if (!$dryRun && $compressed > 0) {
$this->em->flush();
$io->success(sprintf(
'Compressed %d/%d PDFs. Total space saved: %s',
$compressed,
count($documents),
$this->formatBytes($totalSaved)
));
} elseif ($dryRun) {
$io->info('Dry run completed. No changes made.');
} else {
$io->info('No PDFs needed compression.');
}
return Command::SUCCESS;
}
private function formatBytes(int $bytes): string
{
$units = ['B', 'KB', 'MB', 'GB'];
$i = 0;
while ($bytes >= 1024 && $i < count($units) - 1) {
$bytes /= 1024;
++$i;
}
return round($bytes, 2).' '.$units[$i];
}
}

View File

@@ -0,0 +1,54 @@
<?php
declare(strict_types=1);
namespace App\EventListener;
use App\Entity\Document;
use App\Service\PdfCompressorService;
use Doctrine\Bundle\DoctrineBundle\Attribute\AsEntityListener;
use Doctrine\ORM\Events;
use Psr\Log\LoggerInterface;
#[AsEntityListener(event: Events::prePersist, method: 'prePersist', entity: Document::class)]
#[AsEntityListener(event: Events::preUpdate, method: 'preUpdate', entity: Document::class)]
class DocumentPdfCompressorListener
{
public function __construct(
private readonly PdfCompressorService $pdfCompressor,
private readonly ?LoggerInterface $logger = null,
) {}
public function prePersist(Document $document): void
{
$this->compressIfPdf($document);
}
public function preUpdate(Document $document): void
{
$this->compressIfPdf($document);
}
private function compressIfPdf(Document $document): void
{
if ('application/pdf' !== $document->getMimeType()) {
return;
}
$result = $this->pdfCompressor->compressBase64Pdf($document->getPath());
if (null === $result) {
return;
}
$document->setPath($result['path']);
$document->setSize($result['size']);
$this->logger?->info('PDF compressed', [
'document' => $document->getName(),
'originalSize' => $result['originalSize'],
'compressedSize' => $result['size'],
'saved' => $result['saved'],
]);
}
}

View File

@@ -0,0 +1,73 @@
<?php
declare(strict_types=1);
namespace App\Service;
class PdfCompressorService
{
public function compressBase64Pdf(string $base64Data): ?array
{
// Check if qpdf is available
exec('which qpdf', $qpdfPath, $returnCode);
if (0 !== $returnCode) {
return null;
}
// Remove data URI prefix if present
$originalBase64 = $base64Data;
if (str_contains($base64Data, ',')) {
$base64Data = explode(',', $base64Data, 2)[1];
}
$pdfContent = base64_decode($base64Data, true);
if (false === $pdfContent) {
return null;
}
$originalSize = strlen($pdfContent);
// Create temp files
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
file_put_contents($tempInput, $pdfContent);
// Compress with qpdf (lossless)
$command = sprintf(
'qpdf --linearize --object-streams=generate %s %s 2>&1',
escapeshellarg($tempInput),
escapeshellarg($tempOutput)
);
exec($command, $cmdOutput, $returnCode);
if (0 !== $returnCode || !file_exists($tempOutput)) {
@unlink($tempInput);
@unlink($tempOutput);
return null;
}
$compressedContent = file_get_contents($tempOutput);
$compressedSize = strlen($compressedContent);
@unlink($tempInput);
@unlink($tempOutput);
// Only return compressed version if it's smaller
if ($compressedSize >= $originalSize) {
return null;
}
// Rebuild with data URI prefix
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
return [
'path' => $newBase64,
'size' => $compressedSize,
'originalSize' => $originalSize,
'saved' => $originalSize - $compressedSize,
];
}
}