scadenza, caching e log

2026-02-26 16:44:45 +01:00 · 2026-02-26 16:44:45 +01:00 · bba5c11bc9
commit bba5c11bc9
parent da5e332efa
4 changed files with 75 additions and 333 deletions
--- a/scripts/create_mock_order.py
+++ b/scripts/create_mock_order.py
@ -1,246 +0,0 @@
 """
 Script per creare un ordine mock con 4 ingredienti per testare la UI.
 Inserisce direttamente nei database senza passare dalla pipeline (no scraping).
 Uso: uv run python scripts/create_mock_order.py
 """
 import sys
 import os
 # Aggiungi il path del progetto
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
 from pif_compiler.functions.db_utils import (
    db_connect, upsert_cliente, insert_ordine, aggiorna_stato_ordine,
    update_ordine_cliente, upsert_ingrediente
 )
 from pif_compiler.classes.models import (
    StatoOrdine, Ingredient, DapInfo, CosingInfo, ToxIndicator, Toxicity, Esposition
 )
 from pif_compiler.classes.main_workflow import Project, ProjectIngredient
 def ensure_preset_exists(preset_name="Test Preset"):
    """Verifica che il preset esista, altrimenti lo crea."""
    preset = Esposition.get_by_name(preset_name)
    if preset:
        print(f"Preset '{preset_name}' già esistente")
        return preset
    print(f"Creazione preset '{preset_name}'...")
    preset = Esposition(
        preset_name=preset_name,
        tipo_prodotto="Crema corpo",
        luogo_applicazione="Corpo",
        esp_normali=["Dermal"],
        esp_secondarie=["Oral"],
        esp_nano=[],
        sup_esposta=15670,
        freq_applicazione=1,
        qta_giornaliera=7.82,
        ritenzione=1.0
    )
    result = preset.save_to_postgres()
    if result:
        print(f"Preset creato con id_preset={result}")
    else:
        print("ERRORE: impossibile creare il preset")
        sys.exit(1)
    return preset
 def create_mock_ingredients():
    """Crea ingredienti mock con dati finti di tossicologia e DAP."""
    # GLYCERIN (56-81-5) — con NOAEL
    glycerin = Ingredient(
        cas="56-81-5",
        inci=["GLYCERIN"],
        dap_info=DapInfo(
            cas="56-81-5",
            molecular_weight=92.09,
            log_pow=-1.76,
            tpsa=60.69,
            melting_point=18.0
        ),
        cosing_info=[CosingInfo(
            cas=["56-81-5"],
            common_names=["Glycerol"],
            inci=["GLYCERIN"],
            annex=[],
            functionName=["Humectant", "Solvent", "Skin conditioning"],
            otherRestrictions=[],
            cosmeticRestriction=None
        )],
        toxicity=Toxicity(
            cas="56-81-5",
            indicators=[
                ToxIndicator(
                    indicator="NOAEL", value=1000, unit="mg/kg bw/day",
                    route="oral", toxicity_type="repeated_dose_toxicity",
                    ref="https://chem.echa.europa.eu/100.003.264"
                ),
                ToxIndicator(
                    indicator="LD50", value=12600, unit="mg/kg bw",
                    route="oral", toxicity_type="acute_toxicity",
                    ref="https://chem.echa.europa.eu/100.003.264"
                )
            ]
        )
    )
    # CETYL ALCOHOL (36653-82-4) — con NOAEL
    cetyl = Ingredient(
        cas="36653-82-4",
        inci=["CETYL ALCOHOL"],
        dap_info=DapInfo(
            cas="36653-82-4",
            molecular_weight=242.44,
            log_pow=6.83,
            tpsa=20.23,
            melting_point=49.0
        ),
        cosing_info=[CosingInfo(
            cas=["36653-82-4"],
            common_names=["Cetyl alcohol", "1-Hexadecanol"],
            inci=["CETYL ALCOHOL"],
            annex=[],
            functionName=["Emollient", "Emulsifying", "Opacifying"],
            otherRestrictions=[],
            cosmeticRestriction=None
        )],
        toxicity=Toxicity(
            cas="36653-82-4",
            indicators=[
                ToxIndicator(
                    indicator="NOAEL", value=1000, unit="mg/kg bw/day",
                    route="oral", toxicity_type="repeated_dose_toxicity",
                    ref="https://chem.echa.europa.eu/100.004.098"
                )
            ]
        )
    )
    # TOCOPHEROL (59-02-9) — con LOAEL
    tocopherol = Ingredient(
        cas="59-02-9",
        inci=["TOCOPHEROL"],
        dap_info=DapInfo(
            cas="59-02-9",
            molecular_weight=430.71,
            log_pow=10.51,
            tpsa=29.46,
            melting_point=3.0
        ),
        cosing_info=[CosingInfo(
            cas=["59-02-9"],
            common_names=["alpha-Tocopherol"],
            inci=["TOCOPHEROL"],
            annex=[],
            functionName=["Antioxidant", "Skin conditioning"],
            otherRestrictions=[],
            cosmeticRestriction=None
        )],
        toxicity=Toxicity(
            cas="59-02-9",
            indicators=[
                ToxIndicator(
                    indicator="LOAEL", value=500, unit="mg/kg bw/day",
                    route="oral", toxicity_type="repeated_dose_toxicity",
                    ref="https://chem.echa.europa.eu/100.000.375"
                )
            ]
        )
    )
    # Salva ogni ingrediente su MongoDB + PostgreSQL
    for ing in [glycerin, cetyl, tocopherol]:
        mongo_id = ing.save()
        print(f"Ingrediente {ing.cas} ({ing.inci[0]}) salvato (mongo_id={mongo_id})")
    return glycerin, cetyl, tocopherol
 def create_mock_order(preset, glycerin, cetyl, tocopherol):
    """Crea un ordine mock completo."""
    # 1. Upsert cliente
    client_name = "Cosmetica Test Srl"
    id_cliente = upsert_cliente(client_name)
    print(f"Cliente '{client_name}' → id_cliente={id_cliente}")
    # 2. JSON ordine grezzo
    raw_json = {
        "client_name": client_name,
        "product_name": "Crema Idratante Test",
        "preset_esposizione": preset.preset_name,
        "ingredients": [
            {"inci": "AQUA", "cas": "", "percentage": 70.0, "is_colorante": False, "skip_tox": True},
            {"inci": "GLYCERIN", "cas": "56-81-5", "percentage": 15.0, "is_colorante": False, "skip_tox": False},
            {"inci": "CETYL ALCOHOL", "cas": "36653-82-4", "percentage": 10.0, "is_colorante": False, "skip_tox": False},
            {"inci": "TOCOPHEROL", "cas": "59-02-9", "percentage": 5.0, "is_colorante": False, "skip_tox": False},
        ]
    }
    # 3. Salva su MongoDB orders
    orders_col = db_connect(collection_name='orders')
    result = orders_col.insert_one(raw_json.copy())
    uuid_ordine = str(result.inserted_id)
    print(f"Ordine salvato su MongoDB: uuid_ordine={uuid_ordine}")
    # 4. Inserisci in PostgreSQL ordini
    id_ordine = insert_ordine(uuid_ordine, id_cliente)
    print(f"Ordine inserito in PostgreSQL: id_ordine={id_ordine}")
    # 5. Aggiorna stato a ARRICCHITO
    update_ordine_cliente(id_ordine, id_cliente)
    aggiorna_stato_ordine(id_ordine, int(StatoOrdine.ARRICCHITO))
    print(f"Stato ordine aggiornato a ARRICCHITO ({StatoOrdine.ARRICCHITO})")
    # 6. Crea progetto con ingredienti arricchiti
    project = Project(
        order_id=id_ordine,
        product_name="Crema Idratante Test",
        client_name=client_name,
        esposition=preset,
        ingredients=[
            ProjectIngredient(cas=None, inci="AQUA", percentage=70.0, skip_tox=True),
            ProjectIngredient(cas="56-81-5", inci="GLYCERIN", percentage=15.0, ingredient=glycerin),
            ProjectIngredient(cas="36653-82-4", inci="CETYL ALCOHOL", percentage=10.0, ingredient=cetyl),
            ProjectIngredient(cas="59-02-9", inci="TOCOPHEROL", percentage=5.0, ingredient=tocopherol),
        ]
    )
    # 7. Salva il progetto (MongoDB + PostgreSQL)
    uuid_progetto = project.save()
    print(f"Progetto salvato: uuid_progetto={uuid_progetto}")
    print("\n" + "=" * 60)
    print("MOCK ORDER CREATO CON SUCCESSO")
    print("=" * 60)
    print(f"  id_ordine:     {id_ordine}")
    print(f"  uuid_ordine:   {uuid_ordine}")
    print(f"  uuid_progetto: {uuid_progetto}")
    print(f"  cliente:       {client_name}")
    print(f"  prodotto:      Crema Idratante Test")
    print(f"  preset:        {preset.preset_name}")
    print(f"  ingredienti:   4 (AQUA, GLYCERIN, CETYL ALCOHOL, TOCOPHEROL)")
    print(f"  stato:         ARRICCHITO ({StatoOrdine.ARRICCHITO})")
    print("=" * 60)
    return id_ordine
 if __name__ == "__main__":
    print("Creazione ordine mock...")
    print()
    # 1. Assicura che il preset esista
    preset = ensure_preset_exists()
    # 2. Crea ingredienti mock
    glycerin, cetyl, tocopherol = create_mock_ingredients()
    # 3. Crea l'ordine
    create_mock_order(preset, glycerin, cetyl, tocopherol)
--- a/src/pif_compiler/classes/models.py
+++ b/src/pif_compiler/classes/models.py
@ -127,6 +127,7 @@ class CosingInfo(BaseModel):
    otherRestrictions : List[str] = Field(default_factory=list)
    cosmeticRestriction : Optional[str] = None
    reference : Optional[str] = None
    substanceId : Optional[str] = None
    sccsOpinionUrls : List[str] = Field(default_factory=list)
    @classmethod
@ -140,6 +141,7 @@ class CosingInfo(BaseModel):
            'otherRestrictions',
            'cosmeticRestriction',
            'reference',
            'substanceId',
            'inciName',
            'sccsOpinionUrls'
            ]
@ -185,6 +187,8 @@ class CosingInfo(BaseModel):
                cosing_dict['cosmeticRestriction'] = cosing_data[k]  
            if k == 'reference':
                cosing_dict['reference'] = cosing_data[k]
            if k == 'substanceId':
                cosing_dict['substanceId'] = cosing_data[k]
            if k == 'sccsOpinionUrls':
                urls = []
                for url in cosing_data[k]:
@ -213,6 +217,7 @@ class ToxIndicator(BaseModel):
    toxicity_type : Optional[str] = None
    ref : Optional[str] = None
    source : Optional[str] = None
    is_custom : bool = False
    @property
    def priority_rank(self):
@ -392,7 +397,10 @@ class Ingredient(BaseModel):
    @classmethod
    def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
        """Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
-        Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento."""
+        Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.
        Al re-scraping, i campi che risultano None vengono sostituiti con il valore cached
        per evitare regressioni di dati in caso di fallimenti temporanei delle fonti esterne."""
        cached = None
        if not force:
            cached = cls.from_cas(cas)
            if cached and not cached.is_old():
@ -405,6 +413,26 @@ class Ingredient(BaseModel):
            logger.info(f"get_or_create CAS={cas}: force refresh")
        ingredient = cls.ingredient_builder(cas, inci=inci)
        if cached:
            if ingredient.dap_info is None and cached.dap_info is not None:
                logger.warning(f"get_or_create CAS={cas}: dap_info non ottenuto, mantengo dati cached")
                ingredient.dap_info = cached.dap_info
            if ingredient.cosing_info is None and cached.cosing_info is not None:
                logger.warning(f"get_or_create CAS={cas}: cosing_info non ottenuto, mantengo dati cached")
                ingredient.cosing_info = cached.cosing_info
            if ingredient.toxicity is None and cached.toxicity is not None:
                logger.warning(f"get_or_create CAS={cas}: toxicity non ottenuta, mantengo dati cached")
                ingredient.toxicity = cached.toxicity
            elif ingredient.toxicity is not None and cached.toxicity is not None:
                custom_indicators = [i for i in cached.toxicity.indicators if i.is_custom]
                if custom_indicators:
                    logger.info(f"get_or_create CAS={cas}: preservo {len(custom_indicators)} indicatori custom nel re-scraping")
                    ingredient.toxicity = Toxicity(
                        cas=ingredient.toxicity.cas,
                        indicators=ingredient.toxicity.indicators + custom_indicators
                    )
        ingredient.save()
        return ingredient
@ -452,6 +480,7 @@ class Ingredient(BaseModel):
    def add_tox_indicator(self, indicator: ToxIndicator):
        """Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
        indicator.is_custom = True
        if self.toxicity is None:
            self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
        else:
--- a/src/pif_compiler/functions/common_func.py
+++ b/src/pif_compiler/functions/common_func.py
@ -120,7 +120,8 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
        # --- Tox best_case PDF ---
        best = ing.toxicity.best_case if ing.toxicity else None
        if best and best.ref:
-            pdf_name = f"{pi.cas}_{best.source}" if best.source else pi.cas
+            source_label = best.source or best.toxicity_type or "tox"
            pdf_name = f"{pi.cas}_{source_label}"
            log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}")
            success = await generate_pdf(best.ref, pdf_name)
            if success:
@ -143,7 +144,7 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
                    generated.append(pdf_path)
                    continue
-                log.info(f"Download COSING PDF: {pdf_name} (ref={cosing.reference})")
+                log.info(f"Download COSING PDF: {pdf_name} (reference={cosing.reference})")
                content = cosing_download(cosing.reference)
                if isinstance(content, bytes):
                    with open(pdf_path, 'wb') as f:
--- a/src/pif_compiler/services/srv_echa.py
+++ b/src/pif_compiler/services/srv_echa.py
@ -9,7 +9,7 @@ from playwright.sync_api import sync_playwright
 from typing import Callable, Any
 from pif_compiler.functions.common_log import get_logger
-from pif_compiler.functions.db_utils import db_connect, log_ricerche
+from pif_compiler.functions.db_utils import log_ricerche
 log = get_logger()
 load_dotenv()
@ -30,12 +30,12 @@ legislation = "&legislation=REACH"
 def search_substance(cas : str) -> dict:
    response = requests.get(BASE_SEARCH + cas)
    if response.status_code != 200:
-        log.error(f"Network error: {response.status_code}")
+        log.error(f"search_substance CAS={cas}: HTTP {response.status_code}")
        return {}
    else:
        response = response.json()
        if response['state']['totalItems'] == 0:
-            log.info(f"No substance found for CAS {cas}")
+            log.warning(f"search_substance CAS={cas}: nessuna sostanza trovata su ECHA")
            return {}
        else:
            for result in response['items']:
@ -47,9 +47,9 @@ def search_substance(cas : str) -> dict:
                        "rmlName": result["substanceIndex"]["rmlName"],
                        "rmlId": result["substanceIndex"]["rmlId"]
                    }
-                    log.info(f"Substance found for CAS {cas}: {substance['rmlName']}")
+                    log.debug(f"search_substance CAS={cas}: trovata '{substance['rmlName']}'")
                    return substance
-    log.error(f"Something went wrong searching the substance for CAS {cas}")
+    log.warning(f"search_substance CAS={cas}: {response['state']['totalItems']} risultati ma nessun match esatto sul CAS")
    return {}
@ -57,14 +57,16 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
    url = BASE_DOSSIER + rmlId + type + legislation
    response_dossier = requests.get(url)
    if response_dossier.status_code != 200:
-        log.error(f"Network error: {response_dossier.status_code}")
+        log.error(f"get_dossier_info rmlId={rmlId}: HTTP {response_dossier.status_code}")
        return {}
    response_dossier_json = response_dossier.json()
    if response_dossier_json['state']['totalItems'] == 0:
        log.info(f"No dossier found for RML ID {rmlId}")
        if type == active:
            log.debug(f"get_dossier_info rmlId={rmlId}: nessun dossier attivo, provo inattivi")
            return get_dossier_info(rmlId, inactive)
        log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier trovato (né attivo né inattivo)")
        return {}
    dossier_info = {}
    for dossier in response_dossier_json['items']:
        if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)":
            dossier_info = {
@ -75,7 +77,8 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
                "assetExternalId": dossier['assetExternalId'],
                "rootKey": dossier['rootKey']
            }
-    log.info(f"Dossier info retrieved for RML ID {rmlId}")
+    if not dossier_info:
        log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier 'Article 10 - full / Lead' tra i {response_dossier_json['state']['totalItems']} trovati")
    return dossier_info
@ -85,7 +88,7 @@ def get_substance_index(assetExternalId : str) -> dict:
    response = requests.get(INDEX + "/index.html")
    if response.status_code != 200:
-        log.error(f"Network error: {response.status_code}")
+        log.error(f"get_substance_index {assetExternalId}: HTTP {response.status_code}")
        return {}
    soup = BeautifulSoup(response.content, 'html.parser')
@ -98,7 +101,7 @@ def get_substance_index(assetExternalId : str) -> dict:
        txi_href = txi_link['href']
        index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html'
    except Exception as e:
-        log.error(f"Error retrieving toxicological information link: {e}")
+        log.warning(f"get_substance_index: link tossicologia non trovato — {e}")
        index_data['toxicological_information_link'] = None
    # Repeated dose toxicity : rdt
@ -108,7 +111,7 @@ def get_substance_index(assetExternalId : str) -> dict:
        rdt_href = rdt_link['href']
        index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html'
    except Exception as e:
-        log.error(f"Error retrieving repeated dose toxicity link: {e}")
+        log.warning(f"get_substance_index: link repeated dose non trovato — {e}")
        index_data['repeated_dose_toxicity_link'] = None
    # Acute toxicity : at
@ -118,11 +121,9 @@ def get_substance_index(assetExternalId : str) -> dict:
        at_href = at_link['href']
        index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
    except Exception as e:
-        log.error(f"Error retrieving acute toxicity link: {e}")
+        log.warning(f"get_substance_index: link acute toxicity non trovato — {e}")
        index_data['acute_toxicity_link'] = None
    log.info(f"Substance index retrieved for Asset External ID {assetExternalId}")
    return index_data
@ -429,8 +430,8 @@ def echa_flow(cas) -> dict:
        substance = search_substance(cas)
        dossier_info = get_dossier_info(substance['rmlId'])
        index = get_substance_index(dossier_info['assetExternalId'])
-    except Exception as e:
+    except KeyError as e:
-        log.error(f"Error in ECHA flow for CAS {cas}: {e}")
+        log.error(f"echa_flow CAS={cas}: chiave mancante nella risposta ECHA — {e}")
        return {}
    result = {
@ -442,14 +443,14 @@ def echa_flow(cas) -> dict:
        "repeated_dose_toxicity": {}
    }
    log.debug(f"ECHA flow intermediate result")
    # Fetch and parse toxicological information
    txi_link = index.get('toxicological_information_link')
    if txi_link:
        response_summary = requests.get(txi_link)
        if response_summary.status_code == 200:
            result['toxicological_information'] = parse_toxicology_html(response_summary.content)
        else:
            log.warning(f"echa_flow CAS={cas}: tossicologia HTTP {response_summary.status_code}")
    # Fetch and parse acute toxicity
    at_link = index.get('acute_toxicity_link')
@ -457,6 +458,8 @@ def echa_flow(cas) -> dict:
        response_acute = requests.get(at_link)
        if response_acute.status_code == 200:
            result['acute_toxicity'] = parse_toxicology_html(response_acute.content)
        else:
            log.warning(f"echa_flow CAS={cas}: acute toxicity HTTP {response_acute.status_code}")
    # Fetch and parse repeated dose toxicity
    rdt_link = index.get('repeated_dose_toxicity_link')
@ -464,86 +467,41 @@ def echa_flow(cas) -> dict:
        response_repeated = requests.get(rdt_link)
        if response_repeated.status_code == 200:
            result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content)
    for key, value in result.items():
        if value is None or value == "" or value == [] or value == {}:
            log.warning(f"Missing data for key: {key} in CAS {cas}")
        else:
-            log.info(f"Data retrieved for key: {key} in CAS {cas}")
+            log.warning(f"echa_flow CAS={cas}: repeated dose HTTP {response_repeated.status_code}")
    txi_ok = bool(result['toxicological_information'])
    at_ok = bool(result['acute_toxicity'])
    rdt_ok = bool(result['repeated_dose_toxicity'])
    log.info(f"echa_flow CAS={cas}: txi={'OK' if txi_ok else '-'}, acute={'OK' if at_ok else '-'}, rdt={'OK' if rdt_ok else '-'}")
    return result
 def cas_validation(cas: str) -> str:
    log.info(f"Starting ECHA data extraction for CAS: {cas}")
    if cas is None or cas.strip() == "":
-        log.error("No CAS number provided.")
+        log.error("cas_validation: CAS vuoto o None")
        return None
    cas_stripped = cas.replace("-", "")
    if cas_stripped.isdigit() and len(cas_stripped) <= 12:
        log.info(f"CAS number {cas} maybe is valid.")
        return cas.strip()
-    else:
+    log.error(f"cas_validation: CAS '{cas}' non valido (formato non riconosciuto)")
-        log.error(f"CAS number {cas} is not valid.")
+    return None
        return None
 def check_local(cas: str) -> bool:
    collection = db_connect()
    if collection is None:
        log.error("No MongoDB collection available.")
        return None
    record = collection.find_one({"substance.rmlCas": cas})
    if record:
        log.info(f"Record for CAS {cas} found in local database.")
        return record
    else:
        log.info(f"No record for CAS {cas} found in local database.")
        return None
 def add_to_local(data: dict) -> bool:
    collection = db_connect()
    if collection is None:
        log.error("No MongoDB collection available.")
        return False
    try:
        collection.insert_one(data)
        log.info(f"Data for CAS {data['substance']['rmlCas']} added to local database.")
        return True
    except Exception as e:
        log.error(f"Error inserting data into MongoDB: {e}")
        return False
 def orchestrator(cas: str) -> dict:
-    log.debug(f"Initiating search for CAS {cas} in ECHA service.")
+    log.debug(f"ECHA orchestrator CAS={cas}")
    cas_validated = cas_validation(cas)
    if not cas_validated:
        return None
    else:
        log.info(f"CAS {cas} validated successfully.")
        local_record = check_local(cas_validated)
        if local_record:
            log.info(f"Returning local record for CAS {cas}.")
            log_ricerche(cas, 'ECHA', True)
            return local_record
        else:
            log.info(f"No local record, starting echa flow")
            echa_data = echa_flow(cas_validated)
            if echa_data:
                log.info(f"Echa flow successful")
                log_ricerche(cas, 'ECHA', True)
                add_to_local(echa_data)
                return echa_data
            else:
                log.error(f"Failed to retrieve ECHA data for CAS {cas}.")
                log_ricerche(cas, 'ECHA', False)
                return None
-# to do: check if document is complete
+    echa_data = echa_flow(cas_validated)
-# to do: check lastupdate
+    if echa_data:
        log.info(f"ECHA CAS={cas}: completato")
        log_ricerche(cas, 'ECHA', True)
        return echa_data
    else:
        log.error(f"ECHA CAS={cas}: nessun dato recuperato")
        log_ricerche(cas, 'ECHA', False)
        return None
 #endregion
 if __name__ == "__main__":