feat(documents) : filesystem storage, server-side pagination and PDF compression
- Add DocumentStorageService for file-based storage (replaces Base64 in DB) - Add DocumentServeController with /file and /download endpoints - Add DocumentUploadProcessor using FormData + filesystem storage - Add DocumentNormalizer exposing fileUrl/downloadUrl on all responses - Add DocumentFileCleanupListener for automatic file deletion - Add MigrateDocumentsToFilesystemCommand (Base64 → files, memory-safe) - Add ApiFilter (SearchFilter, ExistsFilter, OrderFilter) on Document entity - Add PdfCompressorService + refactor CompressPdfCommand for batch processing - Fix TypeMachine PUT: deserialize=false + validate=false to prevent UniqueEntity false positive and writableLink collection interference - Update CHANGELOG for v1.8.0 - Update frontend submodule Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
218
src/Command/MigrateDocumentsToFilesystemCommand.php
Normal file
218
src/Command/MigrateDocumentsToFilesystemCommand.php
Normal file
@@ -0,0 +1,218 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace App\Command;
|
||||
|
||||
use App\Repository\DocumentRepository;
|
||||
use App\Service\DocumentStorageService;
|
||||
use Doctrine\ORM\EntityManagerInterface;
|
||||
use Symfony\Component\Console\Attribute\AsCommand;
|
||||
use Symfony\Component\Console\Command\Command;
|
||||
use Symfony\Component\Console\Input\InputInterface;
|
||||
use Symfony\Component\Console\Input\InputOption;
|
||||
use Symfony\Component\Console\Output\OutputInterface;
|
||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
||||
use Throwable;
|
||||
|
||||
use function count;
|
||||
use function strlen;
|
||||
|
||||
#[AsCommand(
|
||||
name: 'app:migrate-documents-to-filesystem',
|
||||
description: 'Migrate document storage from Base64 in DB to filesystem',
|
||||
)]
|
||||
class MigrateDocumentsToFilesystemCommand extends Command
|
||||
{
|
||||
public function __construct(
|
||||
private readonly DocumentRepository $documentRepository,
|
||||
private readonly EntityManagerInterface $em,
|
||||
private readonly DocumentStorageService $storageService,
|
||||
) {
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
protected function configure(): void
|
||||
{
|
||||
$this
|
||||
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Show what would be migrated without making changes')
|
||||
->addOption('batch-size', null, InputOption::VALUE_REQUIRED, 'Number of documents to process before flushing', '50')
|
||||
->addOption('limit', null, InputOption::VALUE_REQUIRED, 'Max documents to migrate (for testing)', '0')
|
||||
;
|
||||
}
|
||||
|
||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
||||
{
|
||||
$io = new SymfonyStyle($input, $output);
|
||||
$dryRun = $input->getOption('dry-run');
|
||||
$batchSize = (int) $input->getOption('batch-size');
|
||||
$limit = (int) $input->getOption('limit');
|
||||
|
||||
$io->title('Document Storage Migration: Base64 → Filesystem');
|
||||
|
||||
// Verify storage directory is writable
|
||||
$storageDir = $this->storageService->getStorageDir();
|
||||
if (!$dryRun) {
|
||||
if (!is_dir($storageDir)) {
|
||||
mkdir($storageDir, 0o775, true);
|
||||
}
|
||||
if (!is_writable($storageDir)) {
|
||||
$io->error("Storage directory is not writable: {$storageDir}");
|
||||
|
||||
return Command::FAILURE;
|
||||
}
|
||||
$io->text("Storage directory: {$storageDir}");
|
||||
}
|
||||
|
||||
// Step 1: fetch only IDs of Base64 documents (no heavy path column loaded)
|
||||
$conn = $this->em->getConnection();
|
||||
$ids = $conn->fetchFirstColumn("SELECT id FROM documents WHERE path LIKE 'data:%'");
|
||||
$total = count($ids);
|
||||
$migrated = 0;
|
||||
$skipped = 0;
|
||||
$errors = 0;
|
||||
$totalBytes = 0;
|
||||
|
||||
$io->text(sprintf('Found %d documents with Base64 data to migrate', $total));
|
||||
|
||||
if (0 === $total) {
|
||||
$io->success('Nothing to migrate — all documents are already file-based.');
|
||||
|
||||
return Command::SUCCESS;
|
||||
}
|
||||
|
||||
// Step 2: process one document at a time to avoid memory exhaustion
|
||||
foreach ($ids as $index => $docId) {
|
||||
if ($limit > 0 && $migrated >= $limit) {
|
||||
$io->text("Reached limit of {$limit} documents.");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch single row with raw SQL to keep memory flat
|
||||
$row = $conn->fetchAssociative(
|
||||
'SELECT id, name, filename, path, mimetype, size FROM documents WHERE id = ?',
|
||||
[$docId]
|
||||
);
|
||||
|
||||
if (!$row) {
|
||||
++$skipped;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$path = $row['path'];
|
||||
if (!$this->storageService->isBase64DataUri($path)) {
|
||||
++$skipped;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$docName = $row['name'] ?: $row['filename'];
|
||||
$filename = $row['filename'] ?: $row['name'];
|
||||
$mimeType = $row['mimetype'] ?? 'application/octet-stream';
|
||||
|
||||
// Extract binary content from data URI
|
||||
$parts = explode(',', $path, 2);
|
||||
$base64 = $parts[1] ?? '';
|
||||
$content = base64_decode($base64, true);
|
||||
|
||||
// Free the raw row immediately
|
||||
unset($row, $path, $base64, $parts);
|
||||
|
||||
if (false === $content || '' === $content) {
|
||||
$io->warning(sprintf('[%d/%d] Cannot decode: %s (id: %s)', $index + 1, $total, $docName, $docId));
|
||||
++$errors;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
$fileSize = strlen($content);
|
||||
$extension = $this->storageService->extensionFromFilename(
|
||||
$filename ?: ('file.'.$this->storageService->extensionFromMimeType($mimeType))
|
||||
);
|
||||
|
||||
if ($dryRun) {
|
||||
$io->text(sprintf(
|
||||
' [DRY-RUN] Would migrate: %s (%s)',
|
||||
$docName,
|
||||
$this->formatBytes($fileSize)
|
||||
));
|
||||
++$migrated;
|
||||
$totalBytes += $fileSize;
|
||||
unset($content);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
$relativePath = $this->storageService->store($content, $docId, $extension);
|
||||
unset($content);
|
||||
|
||||
// Update DB directly — avoid loading entity with huge path
|
||||
$conn->executeStatement(
|
||||
'UPDATE documents SET path = ?, size = ? WHERE id = ?',
|
||||
[$relativePath, $fileSize, $docId]
|
||||
);
|
||||
|
||||
++$migrated;
|
||||
$totalBytes += $fileSize;
|
||||
|
||||
$io->text(sprintf(
|
||||
' [OK] %s → %s (%s)',
|
||||
$docName,
|
||||
$relativePath,
|
||||
$this->formatBytes($fileSize)
|
||||
));
|
||||
} catch (Throwable $e) {
|
||||
unset($content);
|
||||
$io->error(sprintf(
|
||||
' [FAIL] %s: %s',
|
||||
$docName,
|
||||
$e->getMessage()
|
||||
));
|
||||
++$errors;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (0 === $migrated % $batchSize) {
|
||||
$io->text(sprintf(' ... %d migrated so far', $migrated));
|
||||
}
|
||||
}
|
||||
|
||||
$io->newLine();
|
||||
$io->table(
|
||||
['Metric', 'Count'],
|
||||
[
|
||||
['Total documents', (string) $total],
|
||||
['Migrated', (string) $migrated],
|
||||
['Skipped (already file-based)', (string) $skipped],
|
||||
['Errors', (string) $errors],
|
||||
['Total bytes written', $this->formatBytes($totalBytes)],
|
||||
]
|
||||
);
|
||||
|
||||
if ($dryRun) {
|
||||
$io->info('Dry run completed. No changes were made.');
|
||||
} elseif ($errors > 0) {
|
||||
$io->warning(sprintf('Migration completed with %d errors.', $errors));
|
||||
} else {
|
||||
$io->success('Migration completed successfully.');
|
||||
}
|
||||
|
||||
return $errors > 0 ? Command::FAILURE : Command::SUCCESS;
|
||||
}
|
||||
|
||||
private function formatBytes(int $bytes): string
|
||||
{
|
||||
$units = ['B', 'KB', 'MB', 'GB'];
|
||||
$i = 0;
|
||||
while ($bytes >= 1024 && $i < count($units) - 1) {
|
||||
$bytes /= 1024;
|
||||
++$i;
|
||||
}
|
||||
|
||||
return round($bytes, 2).' '.$units[$i];
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user