Files
Starseed/docs/migration/extract_mixgraine.py
T
Matthieu ca79b8f8e6
Pull Request — Quality gate / Backend (PHP CS + PHPUnit) (pull_request) Failing after 34s
Pull Request — Quality gate / Frontend (lint + Vitest + build) (pull_request) Successful in 1m20s
chore(migration) : outils d'extraction des tiers Mixgraine (WIP)
Boite a outils de migration des tiers (clients / fournisseurs / prestataires)
depuis l'ancien CRM Mixgraine vers Starseed :

- extract_mixgraine.py : extraction + normalisation via l'API Mixgraine (cache
  disque reprenable, debit ~1 req/s, backoff 429/5xx) -> JSON format Starseed
- build_tiers_xlsx.py  : Excel de relecture (1 onglet par type + Synthese,
  colonne 'Site manquant' filtrable)
- run.sh               : enchaine extraction + Excel
- README.md            : prerequis, recuperation du token, lancement
- mixgraine-migration-analysis.md : analyse + mapping des champs Mixgraine -> Starseed

WIP : les commandes d'import Symfony cote Starseed (seed referentiels/sites,
import Client/Supplier/Provider, 2e passe distributeur/courtier) restent a faire.

Le dossier de sortie mixgraine-export/ (IBAN/BIC + PII reelles) est volontairement
.gitignore : reproductible localement via MIXGRAINE_JWT.
2026-06-17 08:38:23 +02:00

497 lines
19 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Extraction + normalisation des tiers (clients / fournisseurs) depuis le CRM
Mixgraine (https://liot.mixsuite.fr) vers le format des entites Client / Supplier
de Starseed.
Principe :
1. Pagine GET /api/customer/?...&page=N pour collecter tous les id.
2. Pour chaque id, recupere la fiche COMPLETE via
PUT /api/customer/{id} body {"__data": true}
(c'est l'appel que fait le front pour PRECHARGER le formulaire d'edition :
il NE MODIFIE RIEN, il renvoie le schema + les valeurs courantes).
3. Resout les selects (paymentType, banque, pays, distributeur, sites...) via
le schema renvoye, puis normalise chaque tiers au format Starseed.
4. Ecrit clients.json, suppliers.json, referentials.json + un rapport.
Caracteristiques :
- Zero dependance (stdlib uniquement).
- Cache disque par id (reprise apres interruption, pas de refetch).
- Debit volontairement lent (--delay) pour ne pas saturer le serveur.
- Backoff automatique sur erreur reseau / 429 / 5xx.
Usage :
export MIXGRAINE_JWT="eyJ0eXAi..." # ton token Bearer (NE PAS committer)
python3 extract_mixgraine.py # extraction complete
python3 extract_mixgraine.py --delay 1.0 # encore plus doux
python3 extract_mixgraine.py --limit-ids 20 # test rapide sur 20 tiers
Le JWT est un secret de session : passe-le par variable d'environnement,
ne l'ecris jamais en dur ici.
"""
import argparse
import json
import os
import re
import sys
import time
import urllib.error
import urllib.parse
import urllib.request
BASE = os.environ.get("MIXGRAINE_BASE", "https://liot.mixsuite.fr")
JWT = os.environ.get("MIXGRAINE_JWT") or os.environ.get("LAUTTREE_JWT", "")
# --- Tables de correspondance Mixgraine -> codes referentiels Starseed ---------
TVA_MODE = {
"France (ventes)": "FRANCE_VENTES",
"Export (ventes)": "EXPORT_VENTES",
"Intracom (ventes)": "INTRACOM_VENTES",
"France (achats)": "FRANCE_VENTES", # pas de mode "achats" au seed -> a trancher
}
PAYMENT_DELAY = {
"15 jours": "J15",
"20 jours": "J20", # absent du seed Starseed -> a creer
"30 jours": "J30",
"A reception": "A_RECEPTION",
"A réception": "A_RECEPTION",
}
PAYMENT_TYPE = {
"LCR non soumise": "NON_SOUMISE", # pas LCR : on n'a pas toujours de RIB (RG-1.13)
"Virement": "VIREMENT",
"Cheque": "CHEQUE",
"Chèque": "CHEQUE",
}
BANK = {
"CIC": "CIC",
"SOCIETE GENERALE": "SG",
"CREDIT AGRICOLE": "CA",
}
CIVILITES = ("Mme", "Mlle", "Mle", "M.", "Mr", "M") # ordre : plus long d'abord
# --- Petites fonctions utilitaires -------------------------------------------
def http(method, path, body=None, tries=5):
"""Appel HTTP avec retry/backoff. Renvoie le JSON decode."""
url = BASE + path
data = json.dumps(body).encode("utf-8") if body is not None else None
headers = {
"Accept": "application/json, text/plain, */*",
"Authorization": "Bearer " + JWT,
}
if data is not None:
headers["Content-Type"] = "application/json"
delay = 2.0
for attempt in range(1, tries + 1):
req = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=60) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
if e.code in (429, 500, 502, 503, 504) and attempt < tries:
print(f" ! HTTP {e.code} sur {path} -> retry dans {delay:.0f}s", file=sys.stderr)
time.sleep(delay)
delay *= 2
continue
raise
except (urllib.error.URLError, TimeoutError) as e:
if attempt < tries:
print(f" ! reseau ({e}) -> retry dans {delay:.0f}s", file=sys.stderr)
time.sleep(delay)
delay *= 2
continue
raise
raise RuntimeError("echec apres %d tentatives : %s" % (tries, path))
def choices_map(field):
"""Construit {value: label} depuis un champ select du schema."""
out = {}
try:
for c in field["type"]["choices"]:
out[c["value"]] = c["label"]
except (KeyError, TypeError):
pass
return out
def first_id(val):
"""Mixgraine renvoie soit un id, soit [] (vide), soit [{id:..}]."""
if isinstance(val, list):
return val[0]["id"] if val and isinstance(val[0], dict) else None
return val if val not in ("", None) else None
def parse_contact_name(name):
"""'M.ROBERT Florian' -> (lastName='ROBERT', firstName='Florian')."""
if not name:
return None, None
s = name.strip()
for civ in CIVILITES:
if s.upper().startswith(civ.upper()):
s = s[len(civ):].strip(" .")
break
parts = [p for p in s.split() if p]
if not parts:
return None, None
if len(parts) == 1:
return parts[0], None # un seul mot -> nom de famille
return parts[0], " ".join(parts[1:]) # 1er = nom, reste = prenom
def clean_phone(p):
"""Tronque/nettoie pour tenir dans 20 caracteres (limite Starseed)."""
if not p:
return None, None
raw = str(p).strip()
# garde le 1er numero si plusieurs ('... (direct) / ... (standard)')
candidate = re.split(r"[/(]", raw)[0].strip()
cleaned = candidate if candidate else raw
flag = None
if len(cleaned) > 20:
flag = f"tel tronque ({raw!r})"
cleaned = cleaned[:20].strip()
return cleaned, flag
POSTCODE_RE = re.compile(r"^\d{4,5}$")
def clean_postcode(p):
if not p:
return None, None
s = str(p).strip()
if POSTCODE_RE.match(s):
return s, None
return None, f"code postal invalide ({p!r})"
# --- Normalisation d'un tiers -------------------------------------------------
def normalize(record, warnings):
"""record = reponse PUT {__data:true}. Renvoie un dict normalise Starseed."""
fields = record.get("fields", {})
d = record.get("__data", {})
details = record.get("details", {})
geo = details.get("geo", {}) or {}
tid = d.get("id")
name = d.get("name") or d.get("reference")
# --- resolveurs depuis le schema de CE record ---
liab = choices_map(fields.get("liability", {}))
pdelay = choices_map(fields.get("paymentDelay", {}))
ptype = choices_map(fields.get("paymentType", {}))
bank = choices_map(fields.get("accountingBank", {}))
distrib = choices_map(fields.get("distributor", {}))
courtier = choices_map(fields.get("courtier", {}))
cats = choices_map(fields.get("categories", {}))
addr_fields = fields.get("addresses", {}).get("type", {}).get("fields", {})
country_map = choices_map(addr_fields.get("country", {}))
addr_cats = choices_map(addr_fields.get("categories", {}))
carrier_map = choices_map(addr_fields.get("carrierType", {}))
# libelles des sites (organisations)
site_labels = {
"organization_1": addr_fields.get("organization_1", {}).get("label"),
"organization_2": addr_fields.get("organization_2", {}).get("label"),
"organization_3": addr_fields.get("organization_3", {}).get("label"),
}
def map_ref(table, label, what):
if label is None:
return None
code = table.get(label)
if code is None:
warnings.append(f"tiers {tid} ({name}): {what} non mappe : {label!r}")
return code
# --- referentiels comptables ---
tva = map_ref(TVA_MODE, liab.get(first_id(d.get("liability"))), "tvaMode")
delay = map_ref(PAYMENT_DELAY, pdelay.get(first_id(d.get("paymentDelay"))), "paymentDelay")
pay = map_ref(PAYMENT_TYPE, ptype.get(first_id(d.get("paymentType"))), "paymentType")
bnk = map_ref(BANK, bank.get(first_id(d.get("accountingBank"))), "bank")
# --- categories tiers ---
categories = []
for c in d.get("categories", []) or []:
lbl = cats.get(c.get("id"))
if lbl:
categories.append(lbl)
if not categories:
categories = ["A QUALIFIER"] # contrainte min 1 cote Starseed
warnings.append(f"tiers {tid} ({name}): aucune categorie -> 'A QUALIFIER'")
# --- contacts ---
contacts = []
contact_phones = set()
for c in d.get("contacts", []) or []:
last, first = parse_contact_name(c.get("name"))
phone, f1 = clean_phone(c.get("phone"))
mobile, f2 = clean_phone(c.get("mobile"))
if f1:
warnings.append(f"tiers {tid} ({name}): {f1}")
if f2:
warnings.append(f"tiers {tid} ({name}): {f2}")
if not last and not first:
last = "Standard" # RG-1.05/2.04 : au moins un nom
for ph in (phone, mobile):
if ph:
contact_phones.add(re.sub(r"\D", "", ph))
contacts.append({
"mixgraineId": c.get("id"),
"lastName": last,
"firstName": first,
"jobTitle": c.get("function"),
"email": (c.get("email") or None),
"phonePrimary": phone,
"phoneSecondary": mobile,
})
# tel porte par l'objet de base -> dans la liste de contacts (jamais a la racine)
base_phone, fb = clean_phone(d.get("phone"))
if fb:
warnings.append(f"tiers {tid} ({name}): {fb}")
if base_phone and re.sub(r"\D", "", base_phone) not in contact_phones:
if contacts:
# complete le 1er contact sans tel secondaire
for c in contacts:
if not c["phoneSecondary"]:
c["phoneSecondary"] = base_phone
break
else:
contacts[0]["phoneSecondary"] = base_phone
else:
contacts.append({
"mixgraineId": None, "lastName": "Standard", "firstName": None,
"jobTitle": None, "email": None,
"phonePrimary": base_phone, "phoneSecondary": None,
})
# --- emails de facturation (mails[] avec invoice=true) ---
billing_mails = [m["mail"] for m in (d.get("mails") or []) if m.get("invoice") and m.get("mail")]
# --- adresses ---
addresses = []
for a in d.get("addresses", []) or []:
pc, fp = clean_postcode(a.get("postcode"))
if fp:
warnings.append(f"tiers {tid} ({name}): {fp}")
# sites depuis les booleens organization_n
sites = [site_labels[k] for k in ("organization_1", "organization_2", "organization_3")
if a.get(k) and site_labels[k]]
# categories d'adresse
acats = [addr_cats.get(c.get("id")) for c in (a.get("categories") or []) if addr_cats.get(c.get("id"))]
# type d'adresse fournisseur (Rendu/Depart) depuis carrierType
carrier = carrier_map.get(a.get("carrierType"))
supplier_addr_type = {"Rendu": "RENDU", "Départ": "DEPART", "Depart": "DEPART"}.get(carrier)
latlng = geo.get(str(a.get("id"))) or geo.get(a.get("id"))
lat, lng = (latlng.split(",") + [None, None])[:2] if isinstance(latlng, str) else (None, None)
addresses.append({
"mixgraineId": a.get("id"),
"street": a.get("street1"),
"streetComplement": a.get("street2"),
"postalCode": pc,
"city": a.get("city"),
"country": country_map.get(a.get("country"), "France"),
# flags client
"isBilling": bool(a.get("billing")),
"isDelivery": bool(a.get("sales")),
"isProspect": bool(a.get("salesTrip")),
# type fournisseur
"supplierAddressType": supplier_addr_type or "PROSPECT",
"bennes": a.get("benneCount"),
"sites": sites,
"categories": acats,
"billingEmail": (billing_mails[0] if (a.get("billing") and billing_mails) else None),
"contactMixgraineIds": [c.get("id") for c in (a.get("contacts") or [])],
"lat": lat, "lng": lng, # conserve pour info (pas de cible Starseed)
})
# --- RIB (banks[]) ---
ribs = [{
"label": b.get("label") or "Compte principal",
"iban": b.get("iban"),
"bic": b.get("bic"),
} for b in (d.get("banks") or []) if b.get("iban")]
return {
"mixgraineId": tid,
"companyName": name,
"isCustomer": bool(d.get("customer")),
"isSupplier": bool(d.get("supplier")),
"accountNumber": d.get("billingAccount") or None,
"nTva": d.get("vatNumber") or None,
"tvaMode": tva,
"paymentDelay": delay,
"paymentType": pay,
"bank": bnk,
"distributorName": distrib.get(first_id(d.get("distributor"))),
"brokerName": courtier.get(first_id(d.get("courtier"))),
"categories": categories,
"contacts": contacts,
"addresses": addresses,
"ribs": ribs,
}
# --- Recuperation de la liste des id -----------------------------------------
def fetch_ids_for(filters, limit, delay):
"""Pagine /api/customer/ pour un jeu de filtres donne et renvoie la liste d'id."""
fields = urllib.parse.quote('["name"]')
fstr = urllib.parse.quote(json.dumps(filters)) if filters else ""
ids, page, count = [], 0, None
while True:
path = f"/api/customer/?fields={fields}&limit={limit}&order=name&page={page}"
if fstr:
path += f"&filters={fstr}"
resp = http("GET", path)
if count is None:
count = resp.get("count", 0)
batch = resp.get("data", [])
if not batch:
break
ids.extend(r["id"] for r in batch)
page += 1
if count and len(ids) >= count:
break
time.sleep(delay)
return ids
def fetch_all_ids(limit, delay):
"""Collecte les id par groupe (client / fournisseur / prestataire) + union.
On s'appuie sur l'APPARTENANCE aux listes filtrees pour classer chaque tiers,
plus fiable que les flags parfois absents du formulaire __data.
"""
print(" - clients (customer=true)")
customer_ids = set(fetch_ids_for({"customer": True}, limit, delay))
print(f" {len(customer_ids)}")
print(" - fournisseurs (supplier=true)")
supplier_ids = set(fetch_ids_for({"supplier": True}, limit, delay))
print(f" {len(supplier_ids)}")
print(" - prestataires (prestataire=true) -> ranges en fournisseurs")
prestataire_ids = set(fetch_ids_for({"prestataire": True}, limit, delay))
print(f" {len(prestataire_ids)}")
all_ids = sorted(customer_ids | supplier_ids | prestataire_ids)
print(f" total tiers distincts : {len(all_ids)}")
return all_ids, customer_ids, supplier_ids, prestataire_ids
# --- Main ---------------------------------------------------------------------
def main():
ap = argparse.ArgumentParser(description="Extraction Mixgraine -> format Starseed")
ap.add_argument("--out", default="mixgraine-export", help="dossier de sortie")
ap.add_argument("--delay", type=float, default=1.0, help="pause (s) entre chaque fiche (defaut 1 req/s)")
ap.add_argument("--limit", type=int, default=200, help="taille de page pour la liste")
ap.add_argument("--limit-ids", type=int, default=0, help="ne traiter que N tiers (test)")
args = ap.parse_args()
if not JWT:
sys.exit("ERREUR : export MIXGRAINE_JWT='<ton token>' avant de lancer.")
cache_dir = os.path.join(args.out, "cache")
os.makedirs(cache_dir, exist_ok=True)
print("== Etape 1 : liste des id (par groupe) ==")
ids, customer_ids, supplier_ids, prestataire_ids = fetch_all_ids(args.limit, args.delay)
if args.limit_ids:
ids = ids[:args.limit_ids]
print(f"{len(ids)} tiers a recuperer.\n")
print("== Etape 2 : fiches detaillees (lent, soyez patient) ==")
raw_by_id = {}
for n, tid in enumerate(ids, 1):
cache_file = os.path.join(cache_dir, f"{tid}.json")
if os.path.exists(cache_file):
with open(cache_file, encoding="utf-8") as f:
raw_by_id[tid] = json.load(f)
continue
rec = http("PUT", f"/api/customer/{tid}", body={"__data": True})
with open(cache_file, "w", encoding="utf-8") as f:
json.dump(rec, f, ensure_ascii=False)
raw_by_id[tid] = rec
if n % 25 == 0 or n == len(ids):
print(f" {n}/{len(ids)} fiches")
time.sleep(args.delay)
print("\n== Etape 3 : normalisation ==")
warnings = []
clients, suppliers, providers = [], [], []
cat_set, site_set = set(), set()
for tid in ids:
norm = normalize(raw_by_id[tid], warnings)
# classification par APPARTENANCE aux listes filtrees (source fiable) :
# customer -> Client (module Commercial)
# supplier -> Supplier (module Commercial)
# prestataire -> Provider (module Technique) — entite dediee, PAS un Supplier
is_customer = tid in customer_ids or norm["isCustomer"]
is_supplier = tid in supplier_ids or norm["isSupplier"]
is_prestataire = tid in prestataire_ids
norm["isCustomer"] = is_customer
norm["isSupplier"] = is_supplier
norm["isPrestataire"] = is_prestataire
# Provider porte les sites DIRECTEMENT (RG-3.03) : on agrege les sites des adresses.
norm["sites"] = sorted({s for a in norm["addresses"] for s in a["sites"]})
cat_set.update(norm["categories"])
for a in norm["addresses"]:
site_set.update(a["sites"])
cat_set.update(a["categories"])
# un tiers peut cumuler plusieurs roles -> cree dans chaque table concernee
if is_customer:
clients.append(norm)
if is_supplier:
suppliers.append(norm)
if is_prestataire:
providers.append(norm)
if not (is_customer or is_supplier or is_prestataire):
clients.append(norm) # filet de securite : client par defaut
warnings.append(f"tiers {tid} ({norm['companyName']}): aucun flag -> client par defaut")
referentials = {
"categories": sorted(cat_set),
"sites": sorted(site_set),
"tvaModes": sorted(set(TVA_MODE.values())),
"paymentDelays": sorted(set(PAYMENT_DELAY.values())),
"paymentTypes": sorted(set(PAYMENT_TYPE.values())),
"banks": sorted(set(BANK.values())),
}
def dump(fname, obj):
with open(os.path.join(args.out, fname), "w", encoding="utf-8") as f:
json.dump(obj, f, ensure_ascii=False, indent=2)
dump("clients.json", clients)
dump("suppliers.json", suppliers)
dump("providers.json", providers)
dump("referentials.json", referentials)
with open(os.path.join(args.out, "extraction-report.txt"), "w", encoding="utf-8") as f:
f.write(f"Tiers traites : {len(ids)}\n")
f.write(f"Clients : {len(clients)}\n")
f.write(f"Fournisseurs : {len(suppliers)}\n")
f.write(f"Prestataires : {len(providers)}\n")
f.write(f"Categories uniques : {len(referentials['categories'])}\n")
f.write(f"Sites uniques : {referentials['sites']}\n")
f.write(f"Avertissements : {len(warnings)}\n\n")
f.write("\n".join(warnings))
print(f"\nTermine.")
print(f" clients.json : {len(clients)}")
print(f" suppliers.json : {len(suppliers)}")
print(f" providers.json : {len(providers)} (prestataires)")
print(f" referentials.json : {len(referentials['categories'])} categories, sites {referentials['sites']}")
print(f" avertissements : {len(warnings)} (voir extraction-report.txt)")
print(f"Sortie dans : {os.path.abspath(args.out)}")
if __name__ == "__main__":
main()