feat(documents) : filesystem storage, server-side pagination and PDF compression

- Add DocumentStorageService for file-based storage (replaces Base64 in DB)
- Add DocumentServeController with /file and /download endpoints
- Add DocumentUploadProcessor using FormData + filesystem storage
- Add DocumentNormalizer exposing fileUrl/downloadUrl on all responses
- Add DocumentFileCleanupListener for automatic file deletion
- Add MigrateDocumentsToFilesystemCommand (Base64 → files, memory-safe)
- Add ApiFilter (SearchFilter, ExistsFilter, OrderFilter) on Document entity
- Add PdfCompressorService + refactor CompressPdfCommand for batch processing
- Fix TypeMachine PUT: deserialize=false + validate=false to prevent
  UniqueEntity false positive and writableLink collection interference
- Update CHANGELOG for v1.8.0
- Update frontend submodule

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matthieu
2026-03-03 15:18:55 +01:00
parent 7a5dd0b555
commit d89c97f0a0
14 changed files with 942 additions and 90 deletions

View File

@@ -4,7 +4,10 @@ declare(strict_types=1);
namespace App\Command;
use App\Entity\Document;
use App\Repository\DocumentRepository;
use App\Service\DocumentStorageService;
use App\Service\PdfCompressorService;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
@@ -13,15 +16,20 @@ use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use function count;
use function strlen;
#[AsCommand(
name: 'app:compress-pdf',
description: 'Compress all PDF documents stored in database without quality loss',
description: 'Compress all PDF documents without quality loss',
)]
class CompressPdfCommand extends Command
{
public function __construct(
private readonly DocumentRepository $documentRepository,
private readonly EntityManagerInterface $em,
private readonly PdfCompressorService $pdfCompressor,
private readonly DocumentStorageService $storageService,
) {
parent::__construct();
}
@@ -61,87 +69,13 @@ class CompressPdfCommand extends Command
$compressed = 0;
foreach ($documents as $document) {
$base64Data = $document->getPath();
$path = $document->getPath();
// Remove data URI prefix if present
if (str_contains($base64Data, ',')) {
$base64Data = explode(',', $base64Data, 2)[1];
}
$pdfContent = base64_decode($base64Data, true);
if (false === $pdfContent) {
$io->warning(sprintf('Failed to decode document: %s', $document->getName()));
continue;
}
$originalSize = strlen($pdfContent);
if ($dryRun) {
$io->text(sprintf(
' [DRY-RUN] Would compress: %s (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
continue;
}
// Create temp files
$tempInput = tempnam(sys_get_temp_dir(), 'pdf_in_');
$tempOutput = tempnam(sys_get_temp_dir(), 'pdf_out_');
file_put_contents($tempInput, $pdfContent);
// Compress with qpdf (lossless)
$command = sprintf(
'qpdf --linearize --object-streams=generate %s %s 2>&1',
escapeshellarg($tempInput),
escapeshellarg($tempOutput)
);
exec($command, $cmdOutput, $returnCode);
if (0 !== $returnCode || !file_exists($tempOutput)) {
$io->warning(sprintf('Failed to compress: %s', $document->getName()));
@unlink($tempInput);
@unlink($tempOutput);
continue;
}
$compressedContent = file_get_contents($tempOutput);
$compressedSize = strlen($compressedContent);
// Only update if we actually saved space
if ($compressedSize < $originalSize) {
$saved = $originalSize - $compressedSize;
$totalSaved += $saved;
++$compressed;
// Rebuild base64 with data URI prefix
$newBase64 = 'data:application/pdf;base64,'.base64_encode($compressedContent);
$document->setPath($newBase64);
$document->setSize($compressedSize);
$io->text(sprintf(
' ✓ %s: %s → %s (-%s, -%.1f%%)',
$document->getName(),
$this->formatBytes($originalSize),
$this->formatBytes($compressedSize),
$this->formatBytes($saved),
($saved / $originalSize) * 100
));
if ($this->storageService->isBase64DataUri($path)) {
$this->compressBase64Document($document, $path, $dryRun, $io, $totalSaved, $compressed);
} else {
$io->text(sprintf(
' - %s: Already optimal (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
$this->compressFileDocument($document, $path, $dryRun, $io, $totalSaved, $compressed);
}
@unlink($tempInput);
@unlink($tempOutput);
}
if (!$dryRun && $compressed > 0) {
@@ -161,6 +95,115 @@ class CompressPdfCommand extends Command
return Command::SUCCESS;
}
private function compressBase64Document(
Document $document,
string $path,
bool $dryRun,
SymfonyStyle $io,
int &$totalSaved,
int &$compressed,
): void {
$base64Data = $path;
if (str_contains($base64Data, ',')) {
$base64Data = explode(',', $base64Data, 2)[1];
}
$pdfContent = base64_decode($base64Data, true);
if (false === $pdfContent) {
$io->warning(sprintf('Failed to decode document: %s', $document->getName()));
return;
}
$originalSize = strlen($pdfContent);
if ($dryRun) {
$io->text(sprintf(
' [DRY-RUN] Would compress (base64): %s (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
return;
}
$result = $this->pdfCompressor->compressBase64Pdf($path);
if (null !== $result) {
$document->setPath($result['path']);
$document->setSize($result['size']);
$totalSaved += $result['saved'];
++$compressed;
$io->text(sprintf(
' OK %s: %s → %s (-%s, -%.1f%%)',
$document->getName(),
$this->formatBytes($result['originalSize']),
$this->formatBytes($result['size']),
$this->formatBytes($result['saved']),
($result['saved'] / $result['originalSize']) * 100
));
} else {
$io->text(sprintf(
' - %s: Already optimal (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
}
}
private function compressFileDocument(
Document $document,
string $path,
bool $dryRun,
SymfonyStyle $io,
int &$totalSaved,
int &$compressed,
): void {
$absolutePath = $this->storageService->getAbsolutePath($path);
if (!file_exists($absolutePath)) {
$io->warning(sprintf('File not found: %s (%s)', $document->getName(), $path));
return;
}
$originalSize = filesize($absolutePath);
if (false === $originalSize) {
return;
}
if ($dryRun) {
$io->text(sprintf(
' [DRY-RUN] Would compress (file): %s (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
return;
}
$result = $this->pdfCompressor->compressFile($absolutePath);
if (null !== $result) {
$document->setSize($result['size']);
$totalSaved += $result['saved'];
++$compressed;
$io->text(sprintf(
' OK %s: %s → %s (-%s, -%.1f%%)',
$document->getName(),
$this->formatBytes($result['originalSize']),
$this->formatBytes($result['size']),
$this->formatBytes($result['saved']),
($result['saved'] / $result['originalSize']) * 100
));
} else {
$io->text(sprintf(
' - %s: Already optimal (%s)',
$document->getName(),
$this->formatBytes($originalSize)
));
}
}
private function formatBytes(int $bytes): string
{
$units = ['B', 'KB', 'MB', 'GB'];

View File

@@ -0,0 +1,218 @@
<?php
declare(strict_types=1);
namespace App\Command;
use App\Repository\DocumentRepository;
use App\Service\DocumentStorageService;
use Doctrine\ORM\EntityManagerInterface;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Throwable;
use function count;
use function strlen;
#[AsCommand(
name: 'app:migrate-documents-to-filesystem',
description: 'Migrate document storage from Base64 in DB to filesystem',
)]
class MigrateDocumentsToFilesystemCommand extends Command
{
public function __construct(
private readonly DocumentRepository $documentRepository,
private readonly EntityManagerInterface $em,
private readonly DocumentStorageService $storageService,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be migrated without making changes')
->addOption('batch-size', null, InputOption::VALUE_REQUIRED, 'Number of documents to process before flushing', '50')
->addOption('limit', null, InputOption::VALUE_REQUIRED, 'Max documents to migrate (for testing)', '0')
;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$dryRun = $input->getOption('dry-run');
$batchSize = (int) $input->getOption('batch-size');
$limit = (int) $input->getOption('limit');
$io->title('Document Storage Migration: Base64 → Filesystem');
// Verify storage directory is writable
$storageDir = $this->storageService->getStorageDir();
if (!$dryRun) {
if (!is_dir($storageDir)) {
mkdir($storageDir, 0o775, true);
}
if (!is_writable($storageDir)) {
$io->error("Storage directory is not writable: {$storageDir}");
return Command::FAILURE;
}
$io->text("Storage directory: {$storageDir}");
}
// Step 1: fetch only IDs of Base64 documents (no heavy path column loaded)
$conn = $this->em->getConnection();
$ids = $conn->fetchFirstColumn("SELECT id FROM documents WHERE path LIKE 'data:%'");
$total = count($ids);
$migrated = 0;
$skipped = 0;
$errors = 0;
$totalBytes = 0;
$io->text(sprintf('Found %d documents with Base64 data to migrate', $total));
if (0 === $total) {
$io->success('Nothing to migrate — all documents are already file-based.');
return Command::SUCCESS;
}
// Step 2: process one document at a time to avoid memory exhaustion
foreach ($ids as $index => $docId) {
if ($limit > 0 && $migrated >= $limit) {
$io->text("Reached limit of {$limit} documents.");
break;
}
// Fetch single row with raw SQL to keep memory flat
$row = $conn->fetchAssociative(
'SELECT id, name, filename, path, mimetype, size FROM documents WHERE id = ?',
[$docId]
);
if (!$row) {
++$skipped;
continue;
}
$path = $row['path'];
if (!$this->storageService->isBase64DataUri($path)) {
++$skipped;
continue;
}
$docName = $row['name'] ?: $row['filename'];
$filename = $row['filename'] ?: $row['name'];
$mimeType = $row['mimetype'] ?? 'application/octet-stream';
// Extract binary content from data URI
$parts = explode(',', $path, 2);
$base64 = $parts[1] ?? '';
$content = base64_decode($base64, true);
// Free the raw row immediately
unset($row, $path, $base64, $parts);
if (false === $content || '' === $content) {
$io->warning(sprintf('[%d/%d] Cannot decode: %s (id: %s)', $index + 1, $total, $docName, $docId));
++$errors;
continue;
}
$fileSize = strlen($content);
$extension = $this->storageService->extensionFromFilename(
$filename ?: ('file.'.$this->storageService->extensionFromMimeType($mimeType))
);
if ($dryRun) {
$io->text(sprintf(
' [DRY-RUN] Would migrate: %s (%s)',
$docName,
$this->formatBytes($fileSize)
));
++$migrated;
$totalBytes += $fileSize;
unset($content);
continue;
}
try {
$relativePath = $this->storageService->store($content, $docId, $extension);
unset($content);
// Update DB directly — avoid loading entity with huge path
$conn->executeStatement(
'UPDATE documents SET path = ?, size = ? WHERE id = ?',
[$relativePath, $fileSize, $docId]
);
++$migrated;
$totalBytes += $fileSize;
$io->text(sprintf(
' [OK] %s → %s (%s)',
$docName,
$relativePath,
$this->formatBytes($fileSize)
));
} catch (Throwable $e) {
unset($content);
$io->error(sprintf(
' [FAIL] %s: %s',
$docName,
$e->getMessage()
));
++$errors;
continue;
}
if (0 === $migrated % $batchSize) {
$io->text(sprintf(' ... %d migrated so far', $migrated));
}
}
$io->newLine();
$io->table(
['Metric', 'Count'],
[
['Total documents', (string) $total],
['Migrated', (string) $migrated],
['Skipped (already file-based)', (string) $skipped],
['Errors', (string) $errors],
['Total bytes written', $this->formatBytes($totalBytes)],
]
);
if ($dryRun) {
$io->info('Dry run completed. No changes were made.');
} elseif ($errors > 0) {
$io->warning(sprintf('Migration completed with %d errors.', $errors));
} else {
$io->success('Migration completed successfully.');
}
return $errors > 0 ? Command::FAILURE : Command::SUCCESS;
}
private function formatBytes(int $bytes): string
{
$units = ['B', 'KB', 'MB', 'GB'];
$i = 0;
while ($bytes >= 1024 && $i < count($units) - 1) {
$bytes /= 1024;
++$i;
}
return round($bytes, 2).' '.$units[$i];
}
}