feat : automatic PDF compression on upload
- Add PdfCompressorService for lossless compression with qpdf - Add DocumentPdfCompressorListener for automatic compression on persist/update - Add app:compress-pdf command for batch compression of existing PDFs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
175
src/Command/CompressPdfCommand.php
Normal file
175
src/Command/CompressPdfCommand.php
Normal file
@@ -0,0 +1,175 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Repository\DocumentRepository;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'app:compress-pdf',
|
||||
description: 'Compress all PDF documents stored in database without quality loss',
|
||||
)]
|
||||
class CompressPdfCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly DocumentRepository $documentRepository,
|
||||
private readonly EntityManagerInterface $em,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this
|
||||
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be compressed without actually doing it')
|
||||
;
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$io = new SymfonyStyle($input, $output);
|
||||
$dryRun = $input->getOption('dry-run');
|
||||
|
||||
// Check if qpdf is installed
|
||||
exec('which qpdf', $qpdfPath, $returnCode);
|
||||
if (0 !== $returnCode) {
|
||||
$io->error('qpdf is not installed. Run: sudo apt install qpdf');
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
|
||||
$documents = $this->documentRepository->findBy(['mimeType' => 'application/pdf']);
|
||||
|
||||
if (empty($documents)) {
|
||||
$io->info('No PDF documents found.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
$io->title('PDF Compression');
|
||||
$io->text(sprintf('Found %d PDF documents', count($documents)));
|
||||
|
||||
$totalSaved = 0;
|
||||
$compressed = 0;
|
||||
|
||||
foreach ($documents as $document) {
|
||||
$base64Data = $document->getPath();
|
||||
|
||||
// Remove data URI prefix if present
|
||||
if (str_contains($base64Data, ',')) {
|
||||
$base64Data = explode(',', $base64Data, 2)[1];
|
||||
}
|
||||
|
||||
$pdfContent = base64_decode($base64Data, true);
|
||||
if (false === $pdfContent) {
|
||||
$io->warning(sprintf('Failed to decode document: %s', $document->getName()));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$originalSize = strlen($pdfContent);
|
||||
|
||||
if ($dryRun) {
|
||||
$io->text(sprintf(
|
||||
' [DRY-RUN] Would compress: %s (%s)',
|
||||
$document->getName(),
|
||||
$this->formatBytes($originalSize)
|
||||
));
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create temp files
|
||||
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
|
||||
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
|
||||
|
||||
file_put_contents($tempInput, $pdfContent);
|
||||
|
||||
// Compress with qpdf (lossless)
|
||||
$command = sprintf(
|
||||
'qpdf --linearize --object-streams=generate %s %s 2>&1',
|
||||
escapeshellarg($tempInput),
|
||||
escapeshellarg($tempOutput)
|
||||
);
|
||||
|
||||
exec($command, $cmdOutput, $returnCode);
|
||||
|
||||
if (0 !== $returnCode || !file_exists($tempOutput)) {
|
||||
$io->warning(sprintf('Failed to compress: %s', $document->getName()));
|
||||
@unlink($tempInput);
|
||||
@unlink($tempOutput);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$compressedContent = file_get_contents($tempOutput);
|
||||
$compressedSize = strlen($compressedContent);
|
||||
|
||||
// Only update if we actually saved space
|
||||
if ($compressedSize < $originalSize) {
|
||||
$saved = $originalSize - $compressedSize;
|
||||
$totalSaved += $saved;
|
||||
++$compressed;
|
||||
|
||||
// Rebuild base64 with data URI prefix
|
||||
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
|
||||
$document->setPath($newBase64);
|
||||
$document->setSize($compressedSize);
|
||||
|
||||
$io->text(sprintf(
|
||||
' ✓ %s: %s → %s (-%s, -%.1f%%)',
|
||||
$document->getName(),
|
||||
$this->formatBytes($originalSize),
|
||||
$this->formatBytes($compressedSize),
|
||||
$this->formatBytes($saved),
|
||||
($saved / $originalSize) * 100
|
||||
));
|
||||
} else {
|
||||
$io->text(sprintf(
|
||||
' - %s: Already optimal (%s)',
|
||||
$document->getName(),
|
||||
$this->formatBytes($originalSize)
|
||||
));
|
||||
}
|
||||
|
||||
@unlink($tempInput);
|
||||
@unlink($tempOutput);
|
||||
}
|
||||
|
||||
if (!$dryRun && $compressed > 0) {
|
||||
$this->em->flush();
|
||||
$io->success(sprintf(
|
||||
'Compressed %d/%d PDFs. Total space saved: %s',
|
||||
$compressed,
|
||||
count($documents),
|
||||
$this->formatBytes($totalSaved)
|
||||
));
|
||||
} elseif ($dryRun) {
|
||||
$io->info('Dry run completed. No changes made.');
|
||||
} else {
|
||||
$io->info('No PDFs needed compression.');
|
||||
}
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
private function formatBytes(int $bytes): string
|
||||
{
|
||||
$units = ['B', 'KB', 'MB', 'GB'];
|
||||
$i = 0;
|
||||
while ($bytes >= 1024 && $i < count($units) - 1) {
|
||||
$bytes /= 1024;
|
||||
++$i;
|
||||
}
|
||||
|
||||
return round($bytes, 2).' '.$units[$i];
|
||||
}
|
||||
}
|
||||
54
src/EventListener/DocumentPdfCompressorListener.php
Normal file
54
src/EventListener/DocumentPdfCompressorListener.php
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\EventListener;
|
||||
|
||||
use App\Entity\Document;
|
||||
use App\Service\PdfCompressorService;
|
||||
use Doctrine\Bundle\DoctrineBundle\Attribute\AsEntityListener;
|
||||
use Doctrine\ORM\Events;
|
||||
use Psr\Log\LoggerInterface;
|
||||
|
||||
#[AsEntityListener(event: Events::prePersist, method: 'prePersist', entity: Document::class)]
|
||||
#[AsEntityListener(event: Events::preUpdate, method: 'preUpdate', entity: Document::class)]
|
||||
class DocumentPdfCompressorListener
|
||||
{
|
||||
public function __construct(
|
||||
private readonly PdfCompressorService $pdfCompressor,
|
||||
private readonly ?LoggerInterface $logger = null,
|
||||
) {}
|
||||
|
||||
public function prePersist(Document $document): void
|
||||
{
|
||||
$this->compressIfPdf($document);
|
||||
}
|
||||
|
||||
public function preUpdate(Document $document): void
|
||||
{
|
||||
$this->compressIfPdf($document);
|
||||
}
|
||||
|
||||
private function compressIfPdf(Document $document): void
|
||||
{
|
||||
if ('application/pdf' !== $document->getMimeType()) {
|
||||
return;
|
||||
}
|
||||
|
||||
$result = $this->pdfCompressor->compressBase64Pdf($document->getPath());
|
||||
|
||||
if (null === $result) {
|
||||
return;
|
||||
}
|
||||
|
||||
$document->setPath($result['path']);
|
||||
$document->setSize($result['size']);
|
||||
|
||||
$this->logger?->info('PDF compressed', [
|
||||
'document' => $document->getName(),
|
||||
'originalSize' => $result['originalSize'],
|
||||
'compressedSize' => $result['size'],
|
||||
'saved' => $result['saved'],
|
||||
]);
|
||||
}
|
||||
}
|
||||
73
src/Service/PdfCompressorService.php
Normal file
73
src/Service/PdfCompressorService.php
Normal file
@@ -0,0 +1,73 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Service;
|
||||
|
||||
class PdfCompressorService
|
||||
{
|
||||
public function compressBase64Pdf(string $base64Data): ?array
|
||||
{
|
||||
// Check if qpdf is available
|
||||
exec('which qpdf', $qpdfPath, $returnCode);
|
||||
if (0 !== $returnCode) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Remove data URI prefix if present
|
||||
$originalBase64 = $base64Data;
|
||||
if (str_contains($base64Data, ',')) {
|
||||
$base64Data = explode(',', $base64Data, 2)[1];
|
||||
}
|
||||
|
||||
$pdfContent = base64_decode($base64Data, true);
|
||||
if (false === $pdfContent) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$originalSize = strlen($pdfContent);
|
||||
|
||||
// Create temp files
|
||||
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
|
||||
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
|
||||
|
||||
file_put_contents($tempInput, $pdfContent);
|
||||
|
||||
// Compress with qpdf (lossless)
|
||||
$command = sprintf(
|
||||
'qpdf --linearize --object-streams=generate %s %s 2>&1',
|
||||
escapeshellarg($tempInput),
|
||||
escapeshellarg($tempOutput)
|
||||
);
|
||||
|
||||
exec($command, $cmdOutput, $returnCode);
|
||||
|
||||
if (0 !== $returnCode || !file_exists($tempOutput)) {
|
||||
@unlink($tempInput);
|
||||
@unlink($tempOutput);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$compressedContent = file_get_contents($tempOutput);
|
||||
$compressedSize = strlen($compressedContent);
|
||||
|
||||
@unlink($tempInput);
|
||||
@unlink($tempOutput);
|
||||
|
||||
// Only return compressed version if it's smaller
|
||||
if ($compressedSize >= $originalSize) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Rebuild with data URI prefix
|
||||
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
|
||||
|
||||
return [
|
||||
'path' => $newBase64,
|
||||
'size' => $compressedSize,
|
||||
'originalSize' => $originalSize,
|
||||
'saved' => $originalSize - $compressedSize,
|
||||
];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user