Files
Starseed/docs/migration/build_tiers_xlsx.py
T
Matthieu ca79b8f8e6
Pull Request — Quality gate / Backend (PHP CS + PHPUnit) (pull_request) Failing after 34s
Pull Request — Quality gate / Frontend (lint + Vitest + build) (pull_request) Successful in 1m20s
chore(migration) : outils d'extraction des tiers Mixgraine (WIP)
Boite a outils de migration des tiers (clients / fournisseurs / prestataires)
depuis l'ancien CRM Mixgraine vers Starseed :

- extract_mixgraine.py : extraction + normalisation via l'API Mixgraine (cache
  disque reprenable, debit ~1 req/s, backoff 429/5xx) -> JSON format Starseed
- build_tiers_xlsx.py  : Excel de relecture (1 onglet par type + Synthese,
  colonne 'Site manquant' filtrable)
- run.sh               : enchaine extraction + Excel
- README.md            : prerequis, recuperation du token, lancement
- mixgraine-migration-analysis.md : analyse + mapping des champs Mixgraine -> Starseed

WIP : les commandes d'import Symfony cote Starseed (seed referentiels/sites,
import Client/Supplier/Provider, 2e passe distributeur/courtier) restent a faire.

Le dossier de sortie mixgraine-export/ (IBAN/BIC + PII reelles) est volontairement
.gitignore : reproductible localement via MIXGRAINE_JWT.
2026-06-17 08:38:23 +02:00

202 lines
7.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Construit UN classeur Excel de relecture a partir des JSON produits par
extract_mixgraine.py, avec UN ONGLET PAR TYPE :
- Clients
- Fournisseurs
- Prestataires
- Synthese (compteurs)
Chaque onglet liste TOUTES les donnees, une ligne par adresse (les colonnes du
tiers sont repetees). Une colonne « Site manquant » (OUI / vide) + le filtre
automatique Excel permettent de trier en un clic les adresses sans site
(obligatoire cote Starseed — RG-1.10 / 2.06 / 3.03). Les lignes a probleme sont
surlignees en rouge clair.
Usage :
pip install openpyxl
python3 build_tiers_xlsx.py --in mixgraine-export
"""
import argparse
import json
import os
import sys
from collections import Counter
try:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter
except ImportError:
sys.exit("Dependance manquante : pip install openpyxl")
# Compatibilite : les JSON deja generes portent la cle "lauttreeId" ;
# les extractions ulterieures porteront "mixgraineId".
def tid(t):
return t.get("mixgraineId") or t.get("lauttreeId")
def address_problems(addr, target):
pb = []
if not addr.get("sites"):
pb.append("aucun site")
if not addr.get("postalCode"):
pb.append("code postal absent/invalide")
if not addr.get("city"):
pb.append("ville absente")
if not addr.get("street"):
pb.append("rue absente")
if target == "Client" and addr.get("isBilling") and not addr.get("billingEmail"):
pb.append("facturation sans email")
return pb
def tiers_problems(t, target):
pb = []
if not t.get("companyName"):
pb.append("nom absent")
if t.get("paymentType") == "VIREMENT" and not t.get("bank"):
pb.append("VIREMENT sans banque")
if t.get("paymentType") == "LCR" and not t.get("ribs"):
pb.append("LCR sans RIB")
if target == "Provider" and not t.get("sites"):
pb.append("prestataire sans site")
if t.get("categories") == ["A QUALIFIER"]:
pb.append("categorie a qualifier")
return pb
# Colonnes communes (tiers). La colonne « Sites prestataire » n'existe QUE dans
# l'onglet Prestataires (le site y est porte par le tiers, RG-3.03). Pour Clients
# et Fournisseurs, le site est uniquement au niveau adresse (« Sites adresse »).
TIERS_COLS = [
"Réf.", "Société", "Catégories", "Mode paiement", "Banque", "N° TVA",
"N° compte", "Distributeur", "Courtier", "Nb contacts", "Nb RIB",
]
ADDR_COLS = [
"Rue", "Complément", "Code postal", "Ville", "Pays", "Sites adresse",
"Facturation", "Email facturation", "Site manquant", "Problèmes tiers",
]
def headers_for(target):
cols = list(TIERS_COLS)
if target == "Provider":
cols.append("Sites prestataire")
return cols + ADDR_COLS
HEADER_FILL = PatternFill("solid", fgColor="222783")
HEADER_FONT = Font(bold=True, color="FFFFFF")
BAD_FILL = PatternFill("solid", fgColor="FCE4E4") # rouge clair (probleme)
WARN_FILL = PatternFill("solid", fgColor="FFF4D6") # orange clair (site manquant)
def row_for(t, target, addr):
pbt = " ; ".join(tiers_problems(t, target))
base = [
tid(t), t.get("companyName"), ", ".join(t.get("categories") or []),
t.get("paymentType") or "", t.get("bank") or "", t.get("nTva") or "",
t.get("accountNumber") or "", t.get("distributorName") or "",
t.get("brokerName") or "", len(t.get("contacts", [])), len(t.get("ribs", [])),
]
if target == "Provider":
base.append(", ".join(t.get("sites") or []))
if addr is None:
return base + ["", "", "", "", "", "", "", "", "(pas d'adresse)", pbt]
site_missing = "OUI" if not addr.get("sites") else ""
return base + [
addr.get("street") or "", addr.get("streetComplement") or "",
addr.get("postalCode") or "", addr.get("city") or "",
addr.get("country") or "", ", ".join(addr.get("sites") or []),
"oui" if addr.get("isBilling") else "", addr.get("billingEmail") or "",
site_missing, pbt,
]
def build_sheet(wb, title, data, target):
headers = headers_for(target)
site_col = headers.index("Site manquant") + 1
prob_col = len(headers)
ws = wb.create_sheet(title=title)
ws.append(headers)
for c in range(1, len(headers) + 1):
cell = ws.cell(row=1, column=c)
cell.fill = HEADER_FILL
cell.font = HEADER_FONT
cell.alignment = Alignment(vertical="center")
rows = []
for t in data:
for a in (t.get("addresses") or [None]):
rows.append((row_for(t, target, a), address_problems(a, target) if a else ["aucune adresse"]))
for values, pbs in rows:
ws.append(values)
r = ws.max_row
if values[site_col - 1] == "OUI":
ws.cell(row=r, column=site_col).fill = WARN_FILL
if pbs or values[-1]:
ws.cell(row=r, column=prob_col).fill = BAD_FILL
# filtre + gel des en-tetes + largeurs
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{ws.max_row}"
ws.freeze_panes = "A2"
for c, h in enumerate(headers, 1):
sample = [len(str(h))] + [len(str(v[c - 1])) for v, _ in rows[:300]]
ws.column_dimensions[get_column_letter(c)].width = min(max(max(sample) + 2, 10), 50)
return len(rows)
def main():
ap = argparse.ArgumentParser(description="Excel par type (Clients/Fournisseurs/Prestataires) + filtre site manquant")
ap.add_argument("--in", dest="indir", default="mixgraine-export", help="dossier des JSON")
ap.add_argument("--out", default=None, help="chemin du xlsx (defaut: <in>/mixgraine-tiers.xlsx)")
args = ap.parse_args()
def load(name):
path = os.path.join(args.indir, name)
return json.load(open(path, encoding="utf-8")) if os.path.exists(path) else []
sources = [
("Clients", load("clients.json"), "Client"),
("Fournisseurs", load("suppliers.json"), "Supplier"),
("Prestataires", load("providers.json"), "Provider"),
]
wb = Workbook()
wb.remove(wb.active)
summary = []
for title, data, target in sources:
n_rows = build_sheet(wb, title, data, target)
n_tiers = len(data)
n_addr_missing = sum(1 for t in data for a in (t.get("addresses") or []) if not a.get("sites"))
n_nocat = sum(1 for t in data if t.get("categories") == ["A QUALIFIER"])
summary.append((title, n_tiers, n_rows, n_addr_missing, n_nocat))
# onglet Synthese
ws = wb.create_sheet(title="Synthèse")
sh = ["Type", "Tiers", "Lignes (adresses)", "Adresses sans site", "Sans catégorie"]
ws.append(sh)
for c in range(1, len(sh) + 1):
ws.cell(row=1, column=c).fill = HEADER_FILL
ws.cell(row=1, column=c).font = HEADER_FONT
for r in summary:
ws.append(list(r))
for c in range(1, len(sh) + 1):
ws.column_dimensions[get_column_letter(c)].width = 20
# place la synthese en premier
wb.move_sheet("Synthèse", -(len(wb.sheetnames) - 1))
out = args.out or os.path.join(args.indir, "mixgraine-tiers.xlsx")
wb.save(out)
print(f"Ecrit : {out}")
print(f"{'Type':14}{'Tiers':>7}{'Lignes':>8}{'AdrSansSite':>13}{'SansCat':>9}")
for title, n_tiers, n_rows, n_miss, n_nocat in summary:
print(f"{title:14}{n_tiers:7}{n_rows:8}{n_miss:13}{n_nocat:9}")
if __name__ == "__main__":
main()