From a5118305d3b3764054fc2790465fce6d700c0d19 Mon Sep 17 00:00:00 2001 From: Matthieu Date: Sun, 25 Jan 2026 19:02:26 +0100 Subject: [PATCH] feat : automatic PDF compression on upload - Add PdfCompressorService for lossless compression with qpdf - Add DocumentPdfCompressorListener for automatic compression on persist/update - Add app:compress-pdf command for batch compression of existing PDFs Co-Authored-By: Claude Opus 4.5 --- src/Command/CompressPdfCommand.php | 175 ++++++++++++++++++ .../DocumentPdfCompressorListener.php | 54 ++++++ src/Service/PdfCompressorService.php | 73 ++++++++ 3 files changed, 302 insertions(+) create mode 100644 src/Command/CompressPdfCommand.php create mode 100644 src/EventListener/DocumentPdfCompressorListener.php create mode 100644 src/Service/PdfCompressorService.php diff --git a/src/Command/CompressPdfCommand.php b/src/Command/CompressPdfCommand.php new file mode 100644 index 0000000..ee3232c --- /dev/null +++ b/src/Command/CompressPdfCommand.php @@ -0,0 +1,175 @@ +addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be compressed without actually doing it') + ; + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $io = new SymfonyStyle($input, $output); + $dryRun = $input->getOption('dry-run'); + + // Check if qpdf is installed + exec('which qpdf', $qpdfPath, $returnCode); + if (0 !== $returnCode) { + $io->error('qpdf is not installed. Run: sudo apt install qpdf'); + + return Command::FAILURE; + } + + $documents = $this->documentRepository->findBy(['mimeType' => 'application/pdf']); + + if (empty($documents)) { + $io->info('No PDF documents found.'); + + return Command::SUCCESS; + } + + $io->title('PDF Compression'); + $io->text(sprintf('Found %d PDF documents', count($documents))); + + $totalSaved = 0; + $compressed = 0; + + foreach ($documents as $document) { + $base64Data = $document->getPath(); + + // Remove data URI prefix if present + if (str_contains($base64Data, ',')) { + $base64Data = explode(',', $base64Data, 2)[1]; + } + + $pdfContent = base64_decode($base64Data, true); + if (false === $pdfContent) { + $io->warning(sprintf('Failed to decode document: %s', $document->getName())); + + continue; + } + + $originalSize = strlen($pdfContent); + + if ($dryRun) { + $io->text(sprintf( + ' [DRY-RUN] Would compress: %s (%s)', + $document->getName(), + $this->formatBytes($originalSize) + )); + + continue; + } + + // Create temp files + $tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_'); + $tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_'); + + file_put_contents($tempInput, $pdfContent); + + // Compress with qpdf (lossless) + $command = sprintf( + 'qpdf --linearize --object-streams=generate %s %s 2>&1', + escapeshellarg($tempInput), + escapeshellarg($tempOutput) + ); + + exec($command, $cmdOutput, $returnCode); + + if (0 !== $returnCode || !file_exists($tempOutput)) { + $io->warning(sprintf('Failed to compress: %s', $document->getName())); + @unlink($tempInput); + @unlink($tempOutput); + + continue; + } + + $compressedContent = file_get_contents($tempOutput); + $compressedSize = strlen($compressedContent); + + // Only update if we actually saved space + if ($compressedSize < $originalSize) { + $saved = $originalSize - $compressedSize; + $totalSaved += $saved; + ++$compressed; + + // Rebuild base64 with data URI prefix + $newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent); + $document->setPath($newBase64); + $document->setSize($compressedSize); + + $io->text(sprintf( + ' ✓ %s: %s → %s (-%s, -%.1f%%)', + $document->getName(), + $this->formatBytes($originalSize), + $this->formatBytes($compressedSize), + $this->formatBytes($saved), + ($saved / $originalSize) * 100 + )); + } else { + $io->text(sprintf( + ' - %s: Already optimal (%s)', + $document->getName(), + $this->formatBytes($originalSize) + )); + } + + @unlink($tempInput); + @unlink($tempOutput); + } + + if (!$dryRun && $compressed > 0) { + $this->em->flush(); + $io->success(sprintf( + 'Compressed %d/%d PDFs. Total space saved: %s', + $compressed, + count($documents), + $this->formatBytes($totalSaved) + )); + } elseif ($dryRun) { + $io->info('Dry run completed. No changes made.'); + } else { + $io->info('No PDFs needed compression.'); + } + + return Command::SUCCESS; + } + + private function formatBytes(int $bytes): string + { + $units = ['B', 'KB', 'MB', 'GB']; + $i = 0; + while ($bytes >= 1024 && $i < count($units) - 1) { + $bytes /= 1024; + ++$i; + } + + return round($bytes, 2).' '.$units[$i]; + } +} diff --git a/src/EventListener/DocumentPdfCompressorListener.php b/src/EventListener/DocumentPdfCompressorListener.php new file mode 100644 index 0000000..85e00b9 --- /dev/null +++ b/src/EventListener/DocumentPdfCompressorListener.php @@ -0,0 +1,54 @@ +compressIfPdf($document); + } + + public function preUpdate(Document $document): void + { + $this->compressIfPdf($document); + } + + private function compressIfPdf(Document $document): void + { + if ('application/pdf' !== $document->getMimeType()) { + return; + } + + $result = $this->pdfCompressor->compressBase64Pdf($document->getPath()); + + if (null === $result) { + return; + } + + $document->setPath($result['path']); + $document->setSize($result['size']); + + $this->logger?->info('PDF compressed', [ + 'document' => $document->getName(), + 'originalSize' => $result['originalSize'], + 'compressedSize' => $result['size'], + 'saved' => $result['saved'], + ]); + } +} diff --git a/src/Service/PdfCompressorService.php b/src/Service/PdfCompressorService.php new file mode 100644 index 0000000..9ebe502 --- /dev/null +++ b/src/Service/PdfCompressorService.php @@ -0,0 +1,73 @@ +&1', + escapeshellarg($tempInput), + escapeshellarg($tempOutput) + ); + + exec($command, $cmdOutput, $returnCode); + + if (0 !== $returnCode || !file_exists($tempOutput)) { + @unlink($tempInput); + @unlink($tempOutput); + + return null; + } + + $compressedContent = file_get_contents($tempOutput); + $compressedSize = strlen($compressedContent); + + @unlink($tempInput); + @unlink($tempOutput); + + // Only return compressed version if it's smaller + if ($compressedSize >= $originalSize) { + return null; + } + + // Rebuild with data URI prefix + $newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent); + + return [ + 'path' => $newBase64, + 'size' => $compressedSize, + 'originalSize' => $originalSize, + 'saved' => $originalSize - $compressedSize, + ]; + } +}