chore(migration) : outils d'extraction des tiers Mixgraine (WIP)
Boite a outils de migration des tiers (clients / fournisseurs / prestataires) depuis l'ancien CRM Mixgraine vers Starseed : - extract_mixgraine.py : extraction + normalisation via l'API Mixgraine (cache disque reprenable, debit ~1 req/s, backoff 429/5xx) -> JSON format Starseed - build_tiers_xlsx.py : Excel de relecture (1 onglet par type + Synthese, colonne 'Site manquant' filtrable) - run.sh : enchaine extraction + Excel - README.md : prerequis, recuperation du token, lancement - mixgraine-migration-analysis.md : analyse + mapping des champs Mixgraine -> Starseed WIP : les commandes d'import Symfony cote Starseed (seed referentiels/sites, import Client/Supplier/Provider, 2e passe distributeur/courtier) restent a faire. Le dossier de sortie mixgraine-export/ (IBAN/BIC + PII reelles) est volontairement .gitignore : reproductible localement via MIXGRAINE_JWT.
This commit is contained in:
@@ -0,0 +1,201 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Construit UN classeur Excel de relecture a partir des JSON produits par
|
||||
extract_mixgraine.py, avec UN ONGLET PAR TYPE :
|
||||
|
||||
- Clients
|
||||
- Fournisseurs
|
||||
- Prestataires
|
||||
- Synthese (compteurs)
|
||||
|
||||
Chaque onglet liste TOUTES les donnees, une ligne par adresse (les colonnes du
|
||||
tiers sont repetees). Une colonne « Site manquant » (OUI / vide) + le filtre
|
||||
automatique Excel permettent de trier en un clic les adresses sans site
|
||||
(obligatoire cote Starseed — RG-1.10 / 2.06 / 3.03). Les lignes a probleme sont
|
||||
surlignees en rouge clair.
|
||||
|
||||
Usage :
|
||||
pip install openpyxl
|
||||
python3 build_tiers_xlsx.py --in mixgraine-export
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from collections import Counter
|
||||
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment
|
||||
from openpyxl.utils import get_column_letter
|
||||
except ImportError:
|
||||
sys.exit("Dependance manquante : pip install openpyxl")
|
||||
|
||||
|
||||
# Compatibilite : les JSON deja generes portent la cle "lauttreeId" ;
|
||||
# les extractions ulterieures porteront "mixgraineId".
|
||||
def tid(t):
|
||||
return t.get("mixgraineId") or t.get("lauttreeId")
|
||||
|
||||
|
||||
def address_problems(addr, target):
|
||||
pb = []
|
||||
if not addr.get("sites"):
|
||||
pb.append("aucun site")
|
||||
if not addr.get("postalCode"):
|
||||
pb.append("code postal absent/invalide")
|
||||
if not addr.get("city"):
|
||||
pb.append("ville absente")
|
||||
if not addr.get("street"):
|
||||
pb.append("rue absente")
|
||||
if target == "Client" and addr.get("isBilling") and not addr.get("billingEmail"):
|
||||
pb.append("facturation sans email")
|
||||
return pb
|
||||
|
||||
|
||||
def tiers_problems(t, target):
|
||||
pb = []
|
||||
if not t.get("companyName"):
|
||||
pb.append("nom absent")
|
||||
if t.get("paymentType") == "VIREMENT" and not t.get("bank"):
|
||||
pb.append("VIREMENT sans banque")
|
||||
if t.get("paymentType") == "LCR" and not t.get("ribs"):
|
||||
pb.append("LCR sans RIB")
|
||||
if target == "Provider" and not t.get("sites"):
|
||||
pb.append("prestataire sans site")
|
||||
if t.get("categories") == ["A QUALIFIER"]:
|
||||
pb.append("categorie a qualifier")
|
||||
return pb
|
||||
|
||||
|
||||
# Colonnes communes (tiers). La colonne « Sites prestataire » n'existe QUE dans
|
||||
# l'onglet Prestataires (le site y est porte par le tiers, RG-3.03). Pour Clients
|
||||
# et Fournisseurs, le site est uniquement au niveau adresse (« Sites adresse »).
|
||||
TIERS_COLS = [
|
||||
"Réf.", "Société", "Catégories", "Mode paiement", "Banque", "N° TVA",
|
||||
"N° compte", "Distributeur", "Courtier", "Nb contacts", "Nb RIB",
|
||||
]
|
||||
ADDR_COLS = [
|
||||
"Rue", "Complément", "Code postal", "Ville", "Pays", "Sites adresse",
|
||||
"Facturation", "Email facturation", "Site manquant", "Problèmes tiers",
|
||||
]
|
||||
|
||||
|
||||
def headers_for(target):
|
||||
cols = list(TIERS_COLS)
|
||||
if target == "Provider":
|
||||
cols.append("Sites prestataire")
|
||||
return cols + ADDR_COLS
|
||||
|
||||
|
||||
HEADER_FILL = PatternFill("solid", fgColor="222783")
|
||||
HEADER_FONT = Font(bold=True, color="FFFFFF")
|
||||
BAD_FILL = PatternFill("solid", fgColor="FCE4E4") # rouge clair (probleme)
|
||||
WARN_FILL = PatternFill("solid", fgColor="FFF4D6") # orange clair (site manquant)
|
||||
|
||||
|
||||
def row_for(t, target, addr):
|
||||
pbt = " ; ".join(tiers_problems(t, target))
|
||||
base = [
|
||||
tid(t), t.get("companyName"), ", ".join(t.get("categories") or []),
|
||||
t.get("paymentType") or "", t.get("bank") or "", t.get("nTva") or "",
|
||||
t.get("accountNumber") or "", t.get("distributorName") or "",
|
||||
t.get("brokerName") or "", len(t.get("contacts", [])), len(t.get("ribs", [])),
|
||||
]
|
||||
if target == "Provider":
|
||||
base.append(", ".join(t.get("sites") or []))
|
||||
if addr is None:
|
||||
return base + ["", "", "", "", "", "", "", "", "(pas d'adresse)", pbt]
|
||||
site_missing = "OUI" if not addr.get("sites") else ""
|
||||
return base + [
|
||||
addr.get("street") or "", addr.get("streetComplement") or "",
|
||||
addr.get("postalCode") or "", addr.get("city") or "",
|
||||
addr.get("country") or "", ", ".join(addr.get("sites") or []),
|
||||
"oui" if addr.get("isBilling") else "", addr.get("billingEmail") or "",
|
||||
site_missing, pbt,
|
||||
]
|
||||
|
||||
|
||||
def build_sheet(wb, title, data, target):
|
||||
headers = headers_for(target)
|
||||
site_col = headers.index("Site manquant") + 1
|
||||
prob_col = len(headers)
|
||||
ws = wb.create_sheet(title=title)
|
||||
ws.append(headers)
|
||||
for c in range(1, len(headers) + 1):
|
||||
cell = ws.cell(row=1, column=c)
|
||||
cell.fill = HEADER_FILL
|
||||
cell.font = HEADER_FONT
|
||||
cell.alignment = Alignment(vertical="center")
|
||||
rows = []
|
||||
for t in data:
|
||||
for a in (t.get("addresses") or [None]):
|
||||
rows.append((row_for(t, target, a), address_problems(a, target) if a else ["aucune adresse"]))
|
||||
for values, pbs in rows:
|
||||
ws.append(values)
|
||||
r = ws.max_row
|
||||
if values[site_col - 1] == "OUI":
|
||||
ws.cell(row=r, column=site_col).fill = WARN_FILL
|
||||
if pbs or values[-1]:
|
||||
ws.cell(row=r, column=prob_col).fill = BAD_FILL
|
||||
# filtre + gel des en-tetes + largeurs
|
||||
ws.auto_filter.ref = f"A1:{get_column_letter(len(headers))}{ws.max_row}"
|
||||
ws.freeze_panes = "A2"
|
||||
for c, h in enumerate(headers, 1):
|
||||
sample = [len(str(h))] + [len(str(v[c - 1])) for v, _ in rows[:300]]
|
||||
ws.column_dimensions[get_column_letter(c)].width = min(max(max(sample) + 2, 10), 50)
|
||||
return len(rows)
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Excel par type (Clients/Fournisseurs/Prestataires) + filtre site manquant")
|
||||
ap.add_argument("--in", dest="indir", default="mixgraine-export", help="dossier des JSON")
|
||||
ap.add_argument("--out", default=None, help="chemin du xlsx (defaut: <in>/mixgraine-tiers.xlsx)")
|
||||
args = ap.parse_args()
|
||||
|
||||
def load(name):
|
||||
path = os.path.join(args.indir, name)
|
||||
return json.load(open(path, encoding="utf-8")) if os.path.exists(path) else []
|
||||
|
||||
sources = [
|
||||
("Clients", load("clients.json"), "Client"),
|
||||
("Fournisseurs", load("suppliers.json"), "Supplier"),
|
||||
("Prestataires", load("providers.json"), "Provider"),
|
||||
]
|
||||
|
||||
wb = Workbook()
|
||||
wb.remove(wb.active)
|
||||
summary = []
|
||||
for title, data, target in sources:
|
||||
n_rows = build_sheet(wb, title, data, target)
|
||||
n_tiers = len(data)
|
||||
n_addr_missing = sum(1 for t in data for a in (t.get("addresses") or []) if not a.get("sites"))
|
||||
n_nocat = sum(1 for t in data if t.get("categories") == ["A QUALIFIER"])
|
||||
summary.append((title, n_tiers, n_rows, n_addr_missing, n_nocat))
|
||||
|
||||
# onglet Synthese
|
||||
ws = wb.create_sheet(title="Synthèse")
|
||||
sh = ["Type", "Tiers", "Lignes (adresses)", "Adresses sans site", "Sans catégorie"]
|
||||
ws.append(sh)
|
||||
for c in range(1, len(sh) + 1):
|
||||
ws.cell(row=1, column=c).fill = HEADER_FILL
|
||||
ws.cell(row=1, column=c).font = HEADER_FONT
|
||||
for r in summary:
|
||||
ws.append(list(r))
|
||||
for c in range(1, len(sh) + 1):
|
||||
ws.column_dimensions[get_column_letter(c)].width = 20
|
||||
# place la synthese en premier
|
||||
wb.move_sheet("Synthèse", -(len(wb.sheetnames) - 1))
|
||||
|
||||
out = args.out or os.path.join(args.indir, "mixgraine-tiers.xlsx")
|
||||
wb.save(out)
|
||||
print(f"Ecrit : {out}")
|
||||
print(f"{'Type':14}{'Tiers':>7}{'Lignes':>8}{'AdrSansSite':>13}{'SansCat':>9}")
|
||||
for title, n_tiers, n_rows, n_miss, n_nocat in summary:
|
||||
print(f"{title:14}{n_tiers:7}{n_rows:8}{n_miss:13}{n_nocat:9}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user