#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Extraction + normalisation des tiers (clients / fournisseurs) depuis le CRM Mixgraine (https://liot.mixsuite.fr) vers le format des entites Client / Supplier de Starseed. Principe : 1. Pagine GET /api/customer/?...&page=N pour collecter tous les id. 2. Pour chaque id, recupere la fiche COMPLETE via PUT /api/customer/{id} body {"__data": true} (c'est l'appel que fait le front pour PRECHARGER le formulaire d'edition : il NE MODIFIE RIEN, il renvoie le schema + les valeurs courantes). 3. Resout les selects (paymentType, banque, pays, distributeur, sites...) via le schema renvoye, puis normalise chaque tiers au format Starseed. 4. Ecrit clients.json, suppliers.json, referentials.json + un rapport. Caracteristiques : - Zero dependance (stdlib uniquement). - Cache disque par id (reprise apres interruption, pas de refetch). - Debit volontairement lent (--delay) pour ne pas saturer le serveur. - Backoff automatique sur erreur reseau / 429 / 5xx. Usage : export MIXGRAINE_JWT="eyJ0eXAi..." # ton token Bearer (NE PAS committer) python3 extract_mixgraine.py # extraction complete python3 extract_mixgraine.py --delay 1.0 # encore plus doux python3 extract_mixgraine.py --limit-ids 20 # test rapide sur 20 tiers Le JWT est un secret de session : passe-le par variable d'environnement, ne l'ecris jamais en dur ici. """ import argparse import json import os import re import sys import time import urllib.error import urllib.parse import urllib.request BASE = os.environ.get("MIXGRAINE_BASE", "https://liot.mixsuite.fr") JWT = os.environ.get("MIXGRAINE_JWT") or os.environ.get("LAUTTREE_JWT", "") # --- Tables de correspondance Mixgraine -> codes referentiels Starseed --------- TVA_MODE = { "France (ventes)": "FRANCE_VENTES", "Export (ventes)": "EXPORT_VENTES", "Intracom (ventes)": "INTRACOM_VENTES", "France (achats)": "FRANCE_VENTES", # pas de mode "achats" au seed -> a trancher } PAYMENT_DELAY = { "15 jours": "J15", "20 jours": "J20", # absent du seed Starseed -> a creer "30 jours": "J30", "A reception": "A_RECEPTION", "A réception": "A_RECEPTION", } PAYMENT_TYPE = { "LCR non soumise": "NON_SOUMISE", # pas LCR : on n'a pas toujours de RIB (RG-1.13) "Virement": "VIREMENT", "Cheque": "CHEQUE", "Chèque": "CHEQUE", } BANK = { "CIC": "CIC", "SOCIETE GENERALE": "SG", "CREDIT AGRICOLE": "CA", } CIVILITES = ("Mme", "Mlle", "Mle", "M.", "Mr", "M") # ordre : plus long d'abord # --- Petites fonctions utilitaires ------------------------------------------- def http(method, path, body=None, tries=5): """Appel HTTP avec retry/backoff. Renvoie le JSON decode.""" url = BASE + path data = json.dumps(body).encode("utf-8") if body is not None else None headers = { "Accept": "application/json, text/plain, */*", "Authorization": "Bearer " + JWT, } if data is not None: headers["Content-Type"] = "application/json" delay = 2.0 for attempt in range(1, tries + 1): req = urllib.request.Request(url, data=data, headers=headers, method=method) try: with urllib.request.urlopen(req, timeout=60) as resp: return json.loads(resp.read().decode("utf-8")) except urllib.error.HTTPError as e: if e.code in (429, 500, 502, 503, 504) and attempt < tries: print(f" ! HTTP {e.code} sur {path} -> retry dans {delay:.0f}s", file=sys.stderr) time.sleep(delay) delay *= 2 continue raise except (urllib.error.URLError, TimeoutError) as e: if attempt < tries: print(f" ! reseau ({e}) -> retry dans {delay:.0f}s", file=sys.stderr) time.sleep(delay) delay *= 2 continue raise raise RuntimeError("echec apres %d tentatives : %s" % (tries, path)) def choices_map(field): """Construit {value: label} depuis un champ select du schema.""" out = {} try: for c in field["type"]["choices"]: out[c["value"]] = c["label"] except (KeyError, TypeError): pass return out def first_id(val): """Mixgraine renvoie soit un id, soit [] (vide), soit [{id:..}].""" if isinstance(val, list): return val[0]["id"] if val and isinstance(val[0], dict) else None return val if val not in ("", None) else None def parse_contact_name(name): """'M.ROBERT Florian' -> (lastName='ROBERT', firstName='Florian').""" if not name: return None, None s = name.strip() for civ in CIVILITES: if s.upper().startswith(civ.upper()): s = s[len(civ):].strip(" .") break parts = [p for p in s.split() if p] if not parts: return None, None if len(parts) == 1: return parts[0], None # un seul mot -> nom de famille return parts[0], " ".join(parts[1:]) # 1er = nom, reste = prenom def clean_phone(p): """Tronque/nettoie pour tenir dans 20 caracteres (limite Starseed).""" if not p: return None, None raw = str(p).strip() # garde le 1er numero si plusieurs ('... (direct) / ... (standard)') candidate = re.split(r"[/(]", raw)[0].strip() cleaned = candidate if candidate else raw flag = None if len(cleaned) > 20: flag = f"tel tronque ({raw!r})" cleaned = cleaned[:20].strip() return cleaned, flag POSTCODE_RE = re.compile(r"^\d{4,5}$") def clean_postcode(p): if not p: return None, None s = str(p).strip() if POSTCODE_RE.match(s): return s, None return None, f"code postal invalide ({p!r})" # --- Normalisation d'un tiers ------------------------------------------------- def normalize(record, warnings): """record = reponse PUT {__data:true}. Renvoie un dict normalise Starseed.""" fields = record.get("fields", {}) d = record.get("__data", {}) details = record.get("details", {}) geo = details.get("geo", {}) or {} tid = d.get("id") name = d.get("name") or d.get("reference") # --- resolveurs depuis le schema de CE record --- liab = choices_map(fields.get("liability", {})) pdelay = choices_map(fields.get("paymentDelay", {})) ptype = choices_map(fields.get("paymentType", {})) bank = choices_map(fields.get("accountingBank", {})) distrib = choices_map(fields.get("distributor", {})) courtier = choices_map(fields.get("courtier", {})) cats = choices_map(fields.get("categories", {})) addr_fields = fields.get("addresses", {}).get("type", {}).get("fields", {}) country_map = choices_map(addr_fields.get("country", {})) addr_cats = choices_map(addr_fields.get("categories", {})) carrier_map = choices_map(addr_fields.get("carrierType", {})) # libelles des sites (organisations) site_labels = { "organization_1": addr_fields.get("organization_1", {}).get("label"), "organization_2": addr_fields.get("organization_2", {}).get("label"), "organization_3": addr_fields.get("organization_3", {}).get("label"), } def map_ref(table, label, what): if label is None: return None code = table.get(label) if code is None: warnings.append(f"tiers {tid} ({name}): {what} non mappe : {label!r}") return code # --- referentiels comptables --- tva = map_ref(TVA_MODE, liab.get(first_id(d.get("liability"))), "tvaMode") delay = map_ref(PAYMENT_DELAY, pdelay.get(first_id(d.get("paymentDelay"))), "paymentDelay") pay = map_ref(PAYMENT_TYPE, ptype.get(first_id(d.get("paymentType"))), "paymentType") bnk = map_ref(BANK, bank.get(first_id(d.get("accountingBank"))), "bank") # --- categories tiers --- categories = [] for c in d.get("categories", []) or []: lbl = cats.get(c.get("id")) if lbl: categories.append(lbl) if not categories: categories = ["A QUALIFIER"] # contrainte min 1 cote Starseed warnings.append(f"tiers {tid} ({name}): aucune categorie -> 'A QUALIFIER'") # --- contacts --- contacts = [] contact_phones = set() for c in d.get("contacts", []) or []: last, first = parse_contact_name(c.get("name")) phone, f1 = clean_phone(c.get("phone")) mobile, f2 = clean_phone(c.get("mobile")) if f1: warnings.append(f"tiers {tid} ({name}): {f1}") if f2: warnings.append(f"tiers {tid} ({name}): {f2}") if not last and not first: last = "Standard" # RG-1.05/2.04 : au moins un nom for ph in (phone, mobile): if ph: contact_phones.add(re.sub(r"\D", "", ph)) contacts.append({ "mixgraineId": c.get("id"), "lastName": last, "firstName": first, "jobTitle": c.get("function"), "email": (c.get("email") or None), "phonePrimary": phone, "phoneSecondary": mobile, }) # tel porte par l'objet de base -> dans la liste de contacts (jamais a la racine) base_phone, fb = clean_phone(d.get("phone")) if fb: warnings.append(f"tiers {tid} ({name}): {fb}") if base_phone and re.sub(r"\D", "", base_phone) not in contact_phones: if contacts: # complete le 1er contact sans tel secondaire for c in contacts: if not c["phoneSecondary"]: c["phoneSecondary"] = base_phone break else: contacts[0]["phoneSecondary"] = base_phone else: contacts.append({ "mixgraineId": None, "lastName": "Standard", "firstName": None, "jobTitle": None, "email": None, "phonePrimary": base_phone, "phoneSecondary": None, }) # --- emails de facturation (mails[] avec invoice=true) --- billing_mails = [m["mail"] for m in (d.get("mails") or []) if m.get("invoice") and m.get("mail")] # --- adresses --- addresses = [] for a in d.get("addresses", []) or []: pc, fp = clean_postcode(a.get("postcode")) if fp: warnings.append(f"tiers {tid} ({name}): {fp}") # sites depuis les booleens organization_n sites = [site_labels[k] for k in ("organization_1", "organization_2", "organization_3") if a.get(k) and site_labels[k]] # categories d'adresse acats = [addr_cats.get(c.get("id")) for c in (a.get("categories") or []) if addr_cats.get(c.get("id"))] # type d'adresse fournisseur (Rendu/Depart) depuis carrierType carrier = carrier_map.get(a.get("carrierType")) supplier_addr_type = {"Rendu": "RENDU", "Départ": "DEPART", "Depart": "DEPART"}.get(carrier) latlng = geo.get(str(a.get("id"))) or geo.get(a.get("id")) lat, lng = (latlng.split(",") + [None, None])[:2] if isinstance(latlng, str) else (None, None) addresses.append({ "mixgraineId": a.get("id"), "street": a.get("street1"), "streetComplement": a.get("street2"), "postalCode": pc, "city": a.get("city"), "country": country_map.get(a.get("country"), "France"), # flags client "isBilling": bool(a.get("billing")), "isDelivery": bool(a.get("sales")), "isProspect": bool(a.get("salesTrip")), # type fournisseur "supplierAddressType": supplier_addr_type or "PROSPECT", "bennes": a.get("benneCount"), "sites": sites, "categories": acats, "billingEmail": (billing_mails[0] if (a.get("billing") and billing_mails) else None), "contactMixgraineIds": [c.get("id") for c in (a.get("contacts") or [])], "lat": lat, "lng": lng, # conserve pour info (pas de cible Starseed) }) # --- RIB (banks[]) --- ribs = [{ "label": b.get("label") or "Compte principal", "iban": b.get("iban"), "bic": b.get("bic"), } for b in (d.get("banks") or []) if b.get("iban")] return { "mixgraineId": tid, "companyName": name, "isCustomer": bool(d.get("customer")), "isSupplier": bool(d.get("supplier")), "accountNumber": d.get("billingAccount") or None, "nTva": d.get("vatNumber") or None, "tvaMode": tva, "paymentDelay": delay, "paymentType": pay, "bank": bnk, "distributorName": distrib.get(first_id(d.get("distributor"))), "brokerName": courtier.get(first_id(d.get("courtier"))), "categories": categories, "contacts": contacts, "addresses": addresses, "ribs": ribs, } # --- Recuperation de la liste des id ----------------------------------------- def fetch_ids_for(filters, limit, delay): """Pagine /api/customer/ pour un jeu de filtres donne et renvoie la liste d'id.""" fields = urllib.parse.quote('["name"]') fstr = urllib.parse.quote(json.dumps(filters)) if filters else "" ids, page, count = [], 0, None while True: path = f"/api/customer/?fields={fields}&limit={limit}&order=name&page={page}" if fstr: path += f"&filters={fstr}" resp = http("GET", path) if count is None: count = resp.get("count", 0) batch = resp.get("data", []) if not batch: break ids.extend(r["id"] for r in batch) page += 1 if count and len(ids) >= count: break time.sleep(delay) return ids def fetch_all_ids(limit, delay): """Collecte les id par groupe (client / fournisseur / prestataire) + union. On s'appuie sur l'APPARTENANCE aux listes filtrees pour classer chaque tiers, plus fiable que les flags parfois absents du formulaire __data. """ print(" - clients (customer=true)") customer_ids = set(fetch_ids_for({"customer": True}, limit, delay)) print(f" {len(customer_ids)}") print(" - fournisseurs (supplier=true)") supplier_ids = set(fetch_ids_for({"supplier": True}, limit, delay)) print(f" {len(supplier_ids)}") print(" - prestataires (prestataire=true) -> ranges en fournisseurs") prestataire_ids = set(fetch_ids_for({"prestataire": True}, limit, delay)) print(f" {len(prestataire_ids)}") all_ids = sorted(customer_ids | supplier_ids | prestataire_ids) print(f" total tiers distincts : {len(all_ids)}") return all_ids, customer_ids, supplier_ids, prestataire_ids # --- Main --------------------------------------------------------------------- def main(): ap = argparse.ArgumentParser(description="Extraction Mixgraine -> format Starseed") ap.add_argument("--out", default="mixgraine-export", help="dossier de sortie") ap.add_argument("--delay", type=float, default=1.0, help="pause (s) entre chaque fiche (defaut 1 req/s)") ap.add_argument("--limit", type=int, default=200, help="taille de page pour la liste") ap.add_argument("--limit-ids", type=int, default=0, help="ne traiter que N tiers (test)") args = ap.parse_args() if not JWT: sys.exit("ERREUR : export MIXGRAINE_JWT='' avant de lancer.") cache_dir = os.path.join(args.out, "cache") os.makedirs(cache_dir, exist_ok=True) print("== Etape 1 : liste des id (par groupe) ==") ids, customer_ids, supplier_ids, prestataire_ids = fetch_all_ids(args.limit, args.delay) if args.limit_ids: ids = ids[:args.limit_ids] print(f"{len(ids)} tiers a recuperer.\n") print("== Etape 2 : fiches detaillees (lent, soyez patient) ==") raw_by_id = {} for n, tid in enumerate(ids, 1): cache_file = os.path.join(cache_dir, f"{tid}.json") if os.path.exists(cache_file): with open(cache_file, encoding="utf-8") as f: raw_by_id[tid] = json.load(f) continue rec = http("PUT", f"/api/customer/{tid}", body={"__data": True}) with open(cache_file, "w", encoding="utf-8") as f: json.dump(rec, f, ensure_ascii=False) raw_by_id[tid] = rec if n % 25 == 0 or n == len(ids): print(f" {n}/{len(ids)} fiches") time.sleep(args.delay) print("\n== Etape 3 : normalisation ==") warnings = [] clients, suppliers, providers = [], [], [] cat_set, site_set = set(), set() for tid in ids: norm = normalize(raw_by_id[tid], warnings) # classification par APPARTENANCE aux listes filtrees (source fiable) : # customer -> Client (module Commercial) # supplier -> Supplier (module Commercial) # prestataire -> Provider (module Technique) — entite dediee, PAS un Supplier is_customer = tid in customer_ids or norm["isCustomer"] is_supplier = tid in supplier_ids or norm["isSupplier"] is_prestataire = tid in prestataire_ids norm["isCustomer"] = is_customer norm["isSupplier"] = is_supplier norm["isPrestataire"] = is_prestataire # Provider porte les sites DIRECTEMENT (RG-3.03) : on agrege les sites des adresses. norm["sites"] = sorted({s for a in norm["addresses"] for s in a["sites"]}) cat_set.update(norm["categories"]) for a in norm["addresses"]: site_set.update(a["sites"]) cat_set.update(a["categories"]) # un tiers peut cumuler plusieurs roles -> cree dans chaque table concernee if is_customer: clients.append(norm) if is_supplier: suppliers.append(norm) if is_prestataire: providers.append(norm) if not (is_customer or is_supplier or is_prestataire): clients.append(norm) # filet de securite : client par defaut warnings.append(f"tiers {tid} ({norm['companyName']}): aucun flag -> client par defaut") referentials = { "categories": sorted(cat_set), "sites": sorted(site_set), "tvaModes": sorted(set(TVA_MODE.values())), "paymentDelays": sorted(set(PAYMENT_DELAY.values())), "paymentTypes": sorted(set(PAYMENT_TYPE.values())), "banks": sorted(set(BANK.values())), } def dump(fname, obj): with open(os.path.join(args.out, fname), "w", encoding="utf-8") as f: json.dump(obj, f, ensure_ascii=False, indent=2) dump("clients.json", clients) dump("suppliers.json", suppliers) dump("providers.json", providers) dump("referentials.json", referentials) with open(os.path.join(args.out, "extraction-report.txt"), "w", encoding="utf-8") as f: f.write(f"Tiers traites : {len(ids)}\n") f.write(f"Clients : {len(clients)}\n") f.write(f"Fournisseurs : {len(suppliers)}\n") f.write(f"Prestataires : {len(providers)}\n") f.write(f"Categories uniques : {len(referentials['categories'])}\n") f.write(f"Sites uniques : {referentials['sites']}\n") f.write(f"Avertissements : {len(warnings)}\n\n") f.write("\n".join(warnings)) print(f"\nTermine.") print(f" clients.json : {len(clients)}") print(f" suppliers.json : {len(suppliers)}") print(f" providers.json : {len(providers)} (prestataires)") print(f" referentials.json : {len(referentials['categories'])} categories, sites {referentials['sites']}") print(f" avertissements : {len(warnings)} (voir extraction-report.txt)") print(f"Sortie dans : {os.path.abspath(args.out)}") if __name__ == "__main__": main()