feat(transport) : synchronisation du référentiel codes IDTF (ERP-149)

Commande console app:idtf:sync : récupère l'export Excel des codes IDTF (régimes de nettoyage transport) depuis icrt-idtf.com, le parse et synchronise une table référentielle (upsert sur (schema, idtf_number) + soft-delete + journal). Scope road ; discriminant schema road/water conservé.

- migration : tables idtf_product + idtf_sync_log (COMMENT ON COLUMN sur chaque colonne, unique (schema, idtf_number), cas_numbers JSONB)
- IdtfSheetParser : parsing pur d'une matrice (détection dynamique de l'en-tête, mapping par libellé, CAS split, date dd-mm-yyyy -> ISO) + tests unitaires
- SyncIdtfCommand : options --schema / --file / --dry-run, POST avec fields[] explicites (export 11 colonnes), upsert DBAL transactionnel
- cible make idtf-sync
- tests fonctionnels via .xlsx généré (parsing/upsert/journal/soft-delete)

Réutilise framework.http_client (activé pour QUALIMAT, ERP-39). phpoffice/phpspreadsheet déjà présent.
This commit is contained in:
2026-06-12 15:49:28 +02:00
parent c8bff68373
commit abe663d355
6 changed files with 937 additions and 0 deletions
@@ -0,0 +1,219 @@
<?php
declare(strict_types=1);
namespace App\Module\Transport\Application\Idtf;
use RuntimeException;
use function array_slice;
/**
* Parsing pur d'une matrice (lignes/colonnes 0-indexees, telle que retournee
* par PhpSpreadsheet::toArray) de l'export Excel IDTF vers des lignes
* normalisees pretes a l'upsert. Sans dependance a PhpSpreadsheet : la matrice
* est un simple tableau, ce qui rend le parsing testable en isolation.
*
* Robuste au reordonnancement des colonnes (mapping par libelle normalise) et
* aux lignes de preambule (detection dynamique de la ligne d'en-tete). Voir
* ERP-149 § 2.
*/
final class IdtfSheetParser
{
/**
* @param array<int, array<int, mixed>> $matrix
*
* @return array{exportDate: null|string, rows: list<array<string, mixed>>}
*/
public static function parse(array $matrix): array
{
$exportDate = self::extractExportDate($matrix);
$headerIndex = self::findHeaderIndex($matrix);
if (null === $headerIndex) {
throw new RuntimeException("Ligne d'en-tete introuvable (colonne 'Numero IDTF').");
}
$map = self::buildColumnMap($matrix[$headerIndex]);
if (!isset($map['idtf_number'])) {
throw new RuntimeException("Colonne 'Numero IDTF' introuvable dans l'en-tete.");
}
$rows = [];
foreach (array_slice($matrix, $headerIndex + 1) as $row) {
$idtf = trim((string) ($row[$map['idtf_number']] ?? ''));
// Ligne vide / non exploitable : pas d'identifiant numerique.
if ('' === $idtf || !ctype_digit($idtf)) {
continue;
}
$rows[] = [
'idtf_number' => (int) $idtf,
'product_group' => self::val($row, $map['product_group'] ?? null),
'name' => self::val($row, $map['name'] ?? null) ?? '',
'cleaning_regime' => self::val($row, $map['cleaning_regime'] ?? null) ?? '',
'important_requirements' => self::val($row, $map['important_requirements'] ?? null),
'mandatory_date' => self::parseDate(self::val($row, $map['mandatory_date'] ?? null)),
'related_products' => self::val($row, $map['related_products'] ?? null),
'formula' => self::val($row, $map['formula'] ?? null),
'eural_code' => self::val($row, $map['eural_code'] ?? null),
'cas_numbers' => self::splitCas(self::val($row, $map['cas'] ?? null)),
'footnotes' => self::val($row, $map['footnotes'] ?? null),
];
}
return ['exportDate' => $exportDate, 'rows' => $rows];
}
/**
* Cherche une date "d-m-Y" dans les premieres lignes (preambule
* "Export date: 12-6-2026") et la convertit en "Y-m-d". Null si absente.
*
* @param array<int, array<int, mixed>> $matrix
*/
public static function extractExportDate(array $matrix): ?string
{
foreach (array_slice($matrix, 0, 5) as $row) {
$line = implode(' ', array_map(static fn (mixed $c): string => (string) $c, $row));
if (preg_match('/(\d{1,2})-(\d{1,2})-(\d{4})/', $line, $m)) {
$day = (int) $m[1];
$month = (int) $m[2];
$year = (int) $m[3];
if (checkdate($month, $day, $year)) {
return sprintf('%04d-%02d-%02d', $year, $month, $day);
}
}
}
return null;
}
/**
* Index de la ligne d'en-tete : premiere ligne contenant une cellule dont
* le libelle normalise contient "numero idtf".
*
* @param array<int, array<int, mixed>> $matrix
*/
private static function findHeaderIndex(array $matrix): ?int
{
foreach ($matrix as $i => $row) {
foreach ($row as $cell) {
if (str_contains(self::normalize((string) $cell), 'numero idtf')) {
return $i;
}
}
}
return null;
}
/**
* Construit le mapping logique -> index de colonne a partir de la ligne
* d'en-tete (resiste au reordonnancement via fields[]).
*
* @param array<int, mixed> $header
*
* @return array<string, int>
*/
private static function buildColumnMap(array $header): array
{
$map = [];
foreach ($header as $col => $label) {
$n = self::normalize((string) $label);
$key = match (true) {
str_contains($n, 'numero idtf') => 'idtf_number',
str_contains($n, 'product group'),
str_contains($n, 'groupe') => 'product_group',
str_contains($n, 'nom de la marchandise') => 'name',
str_contains($n, 'regime de nettoyage') => 'cleaning_regime',
str_contains($n, 'exigences importantes') => 'important_requirements',
str_contains($n, 'date d application') => 'mandatory_date',
str_contains($n, 'produits apparentes') => 'related_products',
str_contains($n, 'formule') => 'formula',
str_contains($n, 'code eural') => 'eural_code',
str_contains($n, 'numero cas') => 'cas',
str_contains($n, 'annotations') => 'footnotes',
default => null,
};
if (null !== $key && !isset($map[$key])) {
$map[$key] = (int) $col;
}
}
return $map;
}
/**
* Convertit une date "dd-mm-yyyy" en "yyyy-mm-dd". Null si format invalide
* ou date calendaire impossible.
*/
private static function parseDate(?string $raw): ?string
{
if (null === $raw || !preg_match('/^(\d{1,2})-(\d{1,2})-(\d{4})$/', $raw, $m)) {
return null;
}
$day = (int) $m[1];
$month = (int) $m[2];
$year = (int) $m[3];
if (!checkdate($month, $day, $year)) {
return null;
}
return sprintf('%04d-%02d-%02d', $year, $month, $day);
}
/**
* Eclate une cellule "Numero CAS" sur ';' en liste de chaines non vides.
*
* @return list<string>
*/
private static function splitCas(?string $raw): array
{
if (null === $raw) {
return [];
}
$parts = array_map('trim', explode(';', $raw));
return array_values(array_filter($parts, static fn (string $v): bool => '' !== $v));
}
/**
* Valeur d'une cellule par index : trim, null si absente/vide.
*
* @param array<int, mixed> $row
*/
private static function val(array $row, ?int $col): ?string
{
if (null === $col) {
return null;
}
$v = trim((string) ($row[$col] ?? ''));
return '' === $v ? null : $v;
}
/**
* Normalise un libelle d'en-tete : minuscules, sans accents ni apostrophes,
* espaces compresses (pour un matching robuste).
*/
private static function normalize(string $s): string
{
$s = str_replace(['', "'"], ' ', $s);
$s = (string) iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
$s = mb_strtolower($s);
return trim((string) preg_replace('/\s+/', ' ', $s));
}
}
@@ -0,0 +1,317 @@
<?php
declare(strict_types=1);
namespace App\Module\Transport\Infrastructure\Console;
use App\Module\Transport\Application\Idtf\IdtfSheetParser;
use DateTimeImmutable;
use Doctrine\DBAL\Connection;
use PhpOffice\PhpSpreadsheet\IOFactory;
use RuntimeException;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
use Symfony\Component\Console\Style\SymfonyStyle;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Throwable;
use function array_slice;
use function count;
use function in_array;
use const JSON_THROW_ON_ERROR;
use const JSON_UNESCAPED_UNICODE;
/**
* ERP-149 : synchronise le referentiel des codes IDTF (regimes de nettoyage
* transport).
*
* Recupere l'export Excel depuis le generateur icrt-idtf.com (ou un fichier
* local), le parse et synchronise `idtf_product` de facon transactionnelle :
* upsert sur (schema, idtf_number), soft-delete des absents, journal dans
* `idtf_sync_log`. Idempotente (refresh complet).
*/
#[AsCommand(
name: 'app:idtf:sync',
description: 'Synchronise le referentiel des codes IDTF depuis l\'export Excel icrt-idtf.com (upsert + soft-delete + journal).',
)]
final class SyncIdtfCommand extends Command
{
private const string GENERATOR_URL = 'https://www.icrt-idtf.com/fr/excel-generator/';
/**
* Champs a cocher explicitement : `fields[]=all` ne deplie PAS les colonnes
* cote serveur (6 colonnes seulement). Cette liste donne l'export complet
* (11 colonnes). Cf. ERP-149 § 1.
*
* @var list<string>
*/
private const array EXPORT_FIELDS = [
'product_number_idtf',
'product_name',
'minimum_cleaning_regime',
'important_requirements',
'date_mandatory',
'related_products',
'formula',
'product_number_eural',
'product_number_cas',
'footnotes',
];
public function __construct(
private readonly Connection $connection,
private readonly HttpClientInterface $httpClient,
) {
parent::__construct();
}
protected function configure(): void
{
$this
->addOption('schema', null, InputOption::VALUE_REQUIRED, "Module IDTF : 'road' (routier) ou 'water' (fluvial).", 'road')
->addOption('file', null, InputOption::VALUE_REQUIRED, "Chemin d'un .xlsx local (court-circuite le telechargement, utile pour tests/rejeu).")
->addOption('dry-run', null, InputOption::VALUE_NONE, 'Analyse sans ecriture en base.')
;
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$io = new SymfonyStyle($input, $output);
$schema = (string) $input->getOption('schema');
$dryRun = (bool) $input->getOption('dry-run');
$file = $input->getOption('file');
if (!in_array($schema, ['road', 'water'], true)) {
$io->error("--schema doit valoir 'road' ou 'water'.");
return Command::INVALID;
}
// 1. Recuperation du binaire xlsx (local ou via POST).
try {
$xlsx = null !== $file ? $this->readLocal((string) $file) : $this->downloadExport($schema);
} catch (Throwable $e) {
$io->error('Telechargement/lecture impossible : '.$e->getMessage());
return Command::FAILURE;
}
// 2. Parsing (xlsx -> matrice -> lignes normalisees).
try {
$parsed = IdtfSheetParser::parse($this->toMatrix($xlsx));
} catch (Throwable $e) {
$io->error('Parsing impossible : '.$e->getMessage());
return Command::FAILURE;
}
$rows = $parsed['rows'];
$exportDate = $parsed['exportDate'] ?? new DateTimeImmutable()->format('Y-m-d');
$io->section(sprintf('IDTF %s — export du %s', mb_strtoupper($schema), $exportDate));
$io->writeln(sprintf('%d lignes exploitables lues.', count($rows)));
if ($dryRun) {
$this->renderPreview($io, $rows);
$io->note(sprintf('Dry-run : aucune ecriture. (%d lignes au total)', count($rows)));
return Command::SUCCESS;
}
// 3. Sync transactionnelle : upsert -> soft-delete -> journal.
$run = new DateTimeImmutable()->format('Y-m-d H:i:s.u');
$this->connection->beginTransaction();
try {
$upserted = $this->upsertAll($schema, $exportDate, $rows, $run);
$deactivated = $this->deactivateMissing($schema, $run);
$this->log($schema, $exportDate, count($rows), $upserted, $deactivated);
$this->connection->commit();
} catch (Throwable $e) {
$this->connection->rollBack();
$io->error('Sync annulee (rollback) : '.$e->getMessage());
return Command::FAILURE;
}
$io->success(sprintf('%d upsert, %d desactive(s).', $upserted, $deactivated));
return Command::SUCCESS;
}
/**
* Rejoue le POST du generateur pour recuperer le binaire xlsx complet.
* Le formulaire poste sur lui-meme ; pas besoin de GET/cookies prealables.
*/
private function downloadExport(string $schema): string
{
// Corps construit a la main : http-client encoderait fields[] en
// indices numerotes, on veut bien des "fields[]=..." repetes.
$pairs = [
'schema='.$schema,
'type%5B%5D='.$schema,
'roadRegime%5B%5D=all',
'waterRegime%5B%5D=all',
'groups%5B%5D=all',
'products%5B%5D=all',
];
foreach (self::EXPORT_FIELDS as $field) {
$pairs[] = 'fields%5B%5D='.$field;
}
$pairs[] = 'generateExcel=';
$response = $this->httpClient->request('POST', self::GENERATOR_URL, [
'headers' => ['Content-Type' => 'application/x-www-form-urlencoded'],
'body' => implode('&', $pairs),
'timeout' => 90,
]);
$content = $response->getContent();
$contentType = $response->getHeaders(false)['content-type'][0] ?? '';
// Garde-fou : un HTML signifie un POST rejete (filtres/payload).
if (!str_contains($contentType, 'spreadsheet') && !str_starts_with($content, "PK\x03\x04")) {
throw new RuntimeException(sprintf('Reponse non-xlsx (content-type: %s). Verifie le payload.', $contentType));
}
return $content;
}
private function readLocal(string $path): string
{
$raw = @file_get_contents($path);
if (false === $raw) {
throw new RuntimeException(sprintf('Fichier illisible : %s', $path));
}
return $raw;
}
/**
* Charge le binaire xlsx via PhpSpreadsheet et retourne la feuille active
* sous forme de matrice 0-indexee (lignes/colonnes).
*
* @return array<int, array<int, mixed>>
*/
private function toMatrix(string $xlsx): array
{
$tmp = tempnam(sys_get_temp_dir(), 'idtf_').'.xlsx';
file_put_contents($tmp, $xlsx);
try {
// toArray(null, true, true, false) : colonnes 0-indexees.
return IOFactory::load($tmp)->getActiveSheet()->toArray(null, true, true, false);
} finally {
@unlink($tmp);
}
}
/**
* Upsert de toutes les lignes (cle naturelle = schema + idtf_number).
*
* @param list<array<string, mixed>> $rows
*/
private function upsertAll(string $schema, string $exportDate, array $rows, string $run): int
{
$sql = <<<'SQL'
INSERT INTO idtf_product
(idtf_number, schema, product_group, name, cleaning_regime, important_requirements,
mandatory_date, related_products, formula, eural_code, cas_numbers, footnotes,
source_export_date, is_active, last_synced_at)
VALUES
(:idtf, :schema, :grp, :name, :regime, :req, :mdate, :related, :formula, :eural,
CAST(:cas AS JSONB), :foot, :export, TRUE, :run)
ON CONFLICT (schema, idtf_number) DO UPDATE SET
product_group = EXCLUDED.product_group,
name = EXCLUDED.name,
cleaning_regime = EXCLUDED.cleaning_regime,
important_requirements = EXCLUDED.important_requirements,
mandatory_date = EXCLUDED.mandatory_date,
related_products = EXCLUDED.related_products,
formula = EXCLUDED.formula,
eural_code = EXCLUDED.eural_code,
cas_numbers = EXCLUDED.cas_numbers,
footnotes = EXCLUDED.footnotes,
source_export_date = EXCLUDED.source_export_date,
is_active = TRUE,
last_synced_at = EXCLUDED.last_synced_at
SQL;
$count = 0;
foreach ($rows as $r) {
$this->connection->executeStatement($sql, [
'idtf' => $r['idtf_number'],
'schema' => $schema,
'grp' => $r['product_group'],
'name' => $r['name'],
'regime' => $r['cleaning_regime'],
'req' => $r['important_requirements'],
'mdate' => $r['mandatory_date'],
'related' => $r['related_products'],
'formula' => $r['formula'],
'eural' => $r['eural_code'],
'cas' => json_encode($r['cas_numbers'], JSON_UNESCAPED_UNICODE | JSON_THROW_ON_ERROR),
'foot' => $r['footnotes'],
'export' => $exportDate,
'run' => $run,
]);
++$count;
}
return $count;
}
/**
* Soft-delete : toute ligne du schema active non revue par ce run passe a
* is_active=false.
*/
private function deactivateMissing(string $schema, string $run): int
{
return (int) $this->connection->executeStatement(
'UPDATE idtf_product SET is_active = FALSE WHERE schema = :schema AND is_active = TRUE AND last_synced_at < :run',
['schema' => $schema, 'run' => $run],
);
}
private function log(string $schema, string $exportDate, int $total, int $upserted, int $deactivated): void
{
$this->connection->executeStatement(
<<<'SQL'
INSERT INTO idtf_sync_log (schema, export_date, rows_total, rows_upserted, rows_deactivated)
VALUES (:schema, :export, :total, :upserted, :deactivated)
SQL,
[
'schema' => $schema,
'export' => $exportDate,
'total' => $total,
'upserted' => $upserted,
'deactivated' => $deactivated,
],
);
}
/**
* @param list<array<string, mixed>> $rows
*/
private function renderPreview(SymfonyStyle $io, array $rows): void
{
$io->table(
['IDTF', 'Nom', 'Regime', 'CAS'],
array_map(static fn (array $r): array => [
(string) $r['idtf_number'],
mb_strimwidth((string) $r['name'], 0, 50, '…'),
(string) $r['cleaning_regime'],
implode(', ', $r['cas_numbers']),
], array_slice($rows, 0, 15)),
);
}
}