- Add PdfCompressorService for lossless compression with qpdf - Add DocumentPdfCompressorListener for automatic compression on persist/update - Add app:compress-pdf command for batch compression of existing PDFs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
176 lines
5.5 KiB
PHP
176 lines
5.5 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
namespace App\Command;
|
|
|
|
use App\Repository\DocumentRepository;
|
|
use Doctrine\ORM\EntityManagerInterface;
|
|
use Symfony\Component\Console\Attribute\AsCommand;
|
|
use Symfony\Component\Console\Command\Command;
|
|
use Symfony\Component\Console\Input\InputInterface;
|
|
use Symfony\Component\Console\Input\InputOption;
|
|
use Symfony\Component\Console\Output\OutputInterface;
|
|
use Symfony\Component\Console\Style\SymfonyStyle;
|
|
|
|
#[AsCommand(
|
|
name: 'app:compress-pdf',
|
|
description: 'Compress all PDF documents stored in database without quality loss',
|
|
)]
|
|
class CompressPdfCommand extends Command
|
|
{
|
|
public function __construct(
|
|
private readonly DocumentRepository $documentRepository,
|
|
private readonly EntityManagerInterface $em,
|
|
) {
|
|
parent::__construct();
|
|
}
|
|
|
|
protected function configure(): void
|
|
{
|
|
$this
|
|
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be compressed without actually doing it')
|
|
;
|
|
}
|
|
|
|
protected function execute(InputInterface $input, OutputInterface $output): int
|
|
{
|
|
$io = new SymfonyStyle($input, $output);
|
|
$dryRun = $input->getOption('dry-run');
|
|
|
|
// Check if qpdf is installed
|
|
exec('which qpdf', $qpdfPath, $returnCode);
|
|
if (0 !== $returnCode) {
|
|
$io->error('qpdf is not installed. Run: sudo apt install qpdf');
|
|
|
|
return Command::FAILURE;
|
|
}
|
|
|
|
$documents = $this->documentRepository->findBy(['mimeType' => 'application/pdf']);
|
|
|
|
if (empty($documents)) {
|
|
$io->info('No PDF documents found.');
|
|
|
|
return Command::SUCCESS;
|
|
}
|
|
|
|
$io->title('PDF Compression');
|
|
$io->text(sprintf('Found %d PDF documents', count($documents)));
|
|
|
|
$totalSaved = 0;
|
|
$compressed = 0;
|
|
|
|
foreach ($documents as $document) {
|
|
$base64Data = $document->getPath();
|
|
|
|
// Remove data URI prefix if present
|
|
if (str_contains($base64Data, ',')) {
|
|
$base64Data = explode(',', $base64Data, 2)[1];
|
|
}
|
|
|
|
$pdfContent = base64_decode($base64Data, true);
|
|
if (false === $pdfContent) {
|
|
$io->warning(sprintf('Failed to decode document: %s', $document->getName()));
|
|
|
|
continue;
|
|
}
|
|
|
|
$originalSize = strlen($pdfContent);
|
|
|
|
if ($dryRun) {
|
|
$io->text(sprintf(
|
|
' [DRY-RUN] Would compress: %s (%s)',
|
|
$document->getName(),
|
|
$this->formatBytes($originalSize)
|
|
));
|
|
|
|
continue;
|
|
}
|
|
|
|
// Create temp files
|
|
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
|
|
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
|
|
|
|
file_put_contents($tempInput, $pdfContent);
|
|
|
|
// Compress with qpdf (lossless)
|
|
$command = sprintf(
|
|
'qpdf --linearize --object-streams=generate %s %s 2>&1',
|
|
escapeshellarg($tempInput),
|
|
escapeshellarg($tempOutput)
|
|
);
|
|
|
|
exec($command, $cmdOutput, $returnCode);
|
|
|
|
if (0 !== $returnCode || !file_exists($tempOutput)) {
|
|
$io->warning(sprintf('Failed to compress: %s', $document->getName()));
|
|
@unlink($tempInput);
|
|
@unlink($tempOutput);
|
|
|
|
continue;
|
|
}
|
|
|
|
$compressedContent = file_get_contents($tempOutput);
|
|
$compressedSize = strlen($compressedContent);
|
|
|
|
// Only update if we actually saved space
|
|
if ($compressedSize < $originalSize) {
|
|
$saved = $originalSize - $compressedSize;
|
|
$totalSaved += $saved;
|
|
++$compressed;
|
|
|
|
// Rebuild base64 with data URI prefix
|
|
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
|
|
$document->setPath($newBase64);
|
|
$document->setSize($compressedSize);
|
|
|
|
$io->text(sprintf(
|
|
' ✓ %s: %s → %s (-%s, -%.1f%%)',
|
|
$document->getName(),
|
|
$this->formatBytes($originalSize),
|
|
$this->formatBytes($compressedSize),
|
|
$this->formatBytes($saved),
|
|
($saved / $originalSize) * 100
|
|
));
|
|
} else {
|
|
$io->text(sprintf(
|
|
' - %s: Already optimal (%s)',
|
|
$document->getName(),
|
|
$this->formatBytes($originalSize)
|
|
));
|
|
}
|
|
|
|
@unlink($tempInput);
|
|
@unlink($tempOutput);
|
|
}
|
|
|
|
if (!$dryRun && $compressed > 0) {
|
|
$this->em->flush();
|
|
$io->success(sprintf(
|
|
'Compressed %d/%d PDFs. Total space saved: %s',
|
|
$compressed,
|
|
count($documents),
|
|
$this->formatBytes($totalSaved)
|
|
));
|
|
} elseif ($dryRun) {
|
|
$io->info('Dry run completed. No changes made.');
|
|
} else {
|
|
$io->info('No PDFs needed compression.');
|
|
}
|
|
|
|
return Command::SUCCESS;
|
|
}
|
|
|
|
private function formatBytes(int $bytes): string
|
|
{
|
|
$units = ['B', 'KB', 'MB', 'GB'];
|
|
$i = 0;
|
|
while ($bytes >= 1024 && $i < count($units) - 1) {
|
|
$bytes /= 1024;
|
|
++$i;
|
|
}
|
|
|
|
return round($bytes, 2).' '.$units[$i];
|
|
}
|
|
}
|