cosmoguard-bd/src/pif_compiler/classes/models.py

from pydantic import BaseModel, Field, field_validator, ConfigDict, model_validator, computed_field
import re
from enum import IntEnum
from typing import List, Optional
from datetime import datetime as dt

from pif_compiler.functions.common_log import get_logger

logger = get_logger()

class StatoOrdine(IntEnum):
    """Stati ordine per orchestrare il flusso di elaborazione PIF."""
    RICEVUTO = 1            # Input grezzo ricevuto, caricato su MongoDB
    VALIDATO = 2            # L'oggetto è stato creato (compilatore, cliente, tipo cosmetico ok)
    COMPILAZIONE = 3       # l'oggetto Progetto è stato creato ed è in fase di arricchimento
    ARRICCHITO = 5          # oggetto progetto è finalizzato
    CALCOLO = 6             # Calcolo DAP, SED, MoS in corso
    COMPLETATO = 8          # PIF finalizzato
    ERRORE = 9              # Errore durante l'elaborazione, intervento umano

from pif_compiler.services.srv_echa import extract_levels, at_extractor, rdt_extractor, orchestrator
from pif_compiler.functions.db_utils import postgres_connect, upsert_ingrediente, get_ingrediente_by_cas
from pif_compiler.services.srv_pubchem import pubchem_dap
from pif_compiler.services.srv_cosing import cosing_entry


class DapInfo(BaseModel):
    cas: str

    molecular_weight: Optional[float] = Field(default=None, description="In Daltons (Da)")
    high_ionization: Optional[float] = Field(default=None, description="High degree of ionization")
    log_pow: Optional[float] = Field(default=None, description="Partition coefficient")
    tpsa: Optional[float] = Field(default=None, description="Topological polar surface area")
    melting_point: Optional[float] = Field(default=None, description="In Celsius (°C)")

    # --- Il valore DAP Calcolato ---
    # Lo impostiamo di default a 0.5 (50%), verrà sovrascritto dal validator
    dap_value: float = 0.5

    @model_validator(mode='after')
    def compute_dap(self):
        # Lista delle condizioni (True se la condizione riduce l'assorbimento)
        conditions = []

        # 1. MW > 500 Da
        if self.molecular_weight is not None:
            conditions.append(self.molecular_weight > 500)

        # 2. High Ionization (Se è True, riduce l'assorbimento)
        if self.high_ionization is not None:
            conditions.append(self.high_ionization is True)

        # 3. Log Pow <= -1 OR >= 4
        if self.log_pow is not None:
            conditions.append(self.log_pow <= -1 or self.log_pow >= 4)

        # 4. TPSA > 120 Å2
        if self.tpsa is not None:
            conditions.append(self.tpsa > 120)

        # 5. Melting Point > 200°C
        if self.melting_point is not None:
            conditions.append(self.melting_point > 200)

        # LOGICA FINALE:
        # Se c'è almeno una condizione "sicura" (True), il DAP è 0.1
        if any(conditions):
            self.dap_value = 0.1
        else:
            self.dap_value = 0.5

        return self

    @classmethod
    def dap_builder(cls, dap_data: dict):
        """
        Costruisce un oggetto DapInfo a partire dai dati grezzi.
        """
        desiderated_keys = ['CAS', 'MolecularWeight', 'XLogP', 'TPSA', 'Melting Point', 'Dissociation Constants']
        actual_keys = [key for key in dap_data.keys() if key in desiderated_keys]

        dict = {}

        for key in actual_keys:
            if key == 'CAS':
                dict['cas'] = dap_data[key]
            if key == 'MolecularWeight':
                mw = float(dap_data[key])
                dict['molecular_weight'] = mw
            if key == 'XLogP':
                log_pow = float(dap_data[key])
                dict['log_pow'] = log_pow
            if key == 'TPSA':
                tpsa = float(dap_data[key])
                dict['tpsa'] = tpsa
            if key == 'Melting Point':
                try:
                    for item in dap_data[key]:
                        if '°C' in item['Value']:
                            mp = item['Value']
                            mp_value = re.findall(r"[-+]?\d*\.\d+|\d+", mp)
                            if mp_value:
                                dict['melting_point'] = float(mp_value[0])
                except Exception as e:
                    logger.warning(f"DapInfo: parsing melting_point fallito per CAS={dict.get('cas', '?')}: {e}")
                    continue
            if key == 'Dissociation Constants':
                try:
                    for item in dap_data[key]:
                        if 'pKa' in item['Value']:
                            pk = item['Value']
                            pk_value = re.findall(r"[-+]?\d*\.\d+|\d+", pk)
                            if pk_value:
                                dict['high_ionization'] = float(pk_value[0])
                except Exception as e:
                    logger.warning(f"DapInfo: parsing dissociation fallito per CAS={dict.get('cas', '?')}: {e}")
                    continue

        return cls(**dict)

class CosingInfo(BaseModel):
    cas : List[str] = Field(default_factory=list)
    common_names : List[str] = Field(default_factory=list)
    inci : List[str] = Field(default_factory=list)
    annex : List[str] = Field(default_factory=list)
    functionName : List[str] = Field(default_factory=list)
    otherRestrictions : List[str] = Field(default_factory=list)
    cosmeticRestriction : Optional[str] = None
    reference : Optional[str] = None
    substanceId : Optional[str] = None
    sccsOpinionUrls : List[str] = Field(default_factory=list)

    @classmethod
    def cosing_builder(cls, cosing_data : dict):
        cosing_keys = [
            'nameOfCommonIngredientsGlossary',
            'casNo',
            'functionName',
            'annexNo',
            'refNo',
            'otherRestrictions',
            'cosmeticRestriction',
            'reference',
            'substanceId',
            'inciName',
            'sccsOpinionUrls'
            ]
        keys = [k for k in cosing_data.keys() if k in cosing_keys]

        cosing_dict = {}

        for k in keys:
            if k == 'nameOfCommonIngredientsGlossary':
                names = []
                for name in cosing_data[k]:
                    names.append(name)
                cosing_dict['common_names'] = names
            if k == 'inciName':
                inci = []
                for inc in cosing_data[k]:
                    inci.append(inc)
                cosing_dict['inci'] = inci
            if k == 'casNo':
                cas_list = []
                for casNo in cosing_data[k]:
                    cas_list.append(casNo)
                cosing_dict['cas'] = cas_list
            if k == 'functionName':
                functions = []
                for func in cosing_data[k]:
                    functions.append(func)
                cosing_dict['functionName'] = functions
            if k == 'annexNo':
                annexes = []
                i = 0
                for ann in cosing_data[k]:
                    restriction = ann + ' / ' + cosing_data['refNo'][i]
                    annexes.append(restriction)
                    i = i+1
                cosing_dict['annex'] = annexes
            if k == 'otherRestrictions':
                other_restrictions = []
                for ores in cosing_data[k]:
                    other_restrictions.append(ores)
                cosing_dict['otherRestrictions'] = other_restrictions
            if k == 'cosmeticRestriction':
                cosing_dict['cosmeticRestriction'] = cosing_data[k]
            if k == 'reference':
                cosing_dict['reference'] = cosing_data[k]
            if k == 'substanceId':
                cosing_dict['substanceId'] = cosing_data[k]
            if k == 'sccsOpinionUrls':
                urls = []
                for url in cosing_data[k]:
                    urls.append(url)
                cosing_dict['sccsOpinionUrls'] = urls

        return cls(**cosing_dict)

    @classmethod
    def cycle_identified(cls, cosing_data : dict):
        cosing_entries = []
        if 'identifiedIngredient' in cosing_data.keys():
            for each_entry in cosing_data['identifiedIngredient']:
                identified_cosing = cls.cosing_builder(each_entry)
                cosing_entries.append(identified_cosing)
        main = cls.cosing_builder(cosing_data)
        cosing_entries.append(main)

        return cosing_entries

class ToxIndicator(BaseModel):
    indicator : str
    value : float
    unit : str
    route : str
    toxicity_type : Optional[str] = None
    ref : Optional[str] = None
    source : Optional[str] = None
    is_custom : bool = False

    @property
    def priority_rank(self):
        """Returns the numerical priority based on the toxicological indicator."""
        mapping = {
            'LD50': 1,
            'DL50': 1,
            'LOAEL': 3,
            'NOAEL': 4
        }
        return mapping.get(self.indicator, -1)

    @property
    def factor(self):
        """Returns the factor based on the toxicity type."""
        if self.priority_rank == 1:
            return 10
        elif self.priority_rank == 3:
            return 3
        return 1

class Toxicity(BaseModel):
    cas: str
    indicators: list[ToxIndicator]
    best_case: Optional[ToxIndicator] = None
    factor: Optional[int] = None

    @model_validator(mode='after')
    def set_best_case(self) -> 'Toxicity':
        if not self.indicators:
            return self

        # 1. Pick highest priority rank (NOAEL=4 > LOAEL=3 > LD50=1)
        max_rank = max(x.priority_rank for x in self.indicators)
        top = [x for x in self.indicators if x.priority_rank == max_rank]

        # 2. Tiebreak by route preference: dermal > oral > inhalation > other
        def _route_order(ind: ToxIndicator) -> int:
            r = ind.route.lower()
            if "dermal" in r:
                return 1
            if "oral" in r:
                return 2
            if "inhalation" in r:
                return 3
            return 4

        best_route = min(_route_order(x) for x in top)
        top = [x for x in top if _route_order(x) == best_route]

        # 3. For NOAEL/LOAEL, pick the lowest value (most conservative)
        if top[0].indicator in ('NOAEL', 'LOAEL'):
            self.best_case = min(top, key=lambda x: x.value)
        else:
            self.best_case = top[0]

        self.factor = self.best_case.factor
        return self

    @classmethod
    def from_result(cls, cas: str, result):
        toxicity_types = ['repeated_dose_toxicity', 'acute_toxicity']
        indicators_list = []

        for tt in toxicity_types:
            if tt not in result:
                logger.debug(f"Toxicity CAS={cas}: nessun dato per {tt}")
                continue

            try:
                extractor = at_extractor if tt == 'acute_toxicity' else rdt_extractor
                fetch = extract_levels(result[tt], extractor=extractor)

                if not fetch:
                    logger.warning(f"Toxicity CAS={cas}: {tt} presente ma nessun indicatore estratto")
                    continue

                links = result.get("index")
                link = links.get(f"{tt}_link", "")

                for key, lvl in fetch.items():
                    lvl['ref'] = link
                    lvl['source'] = tt
                    elem = ToxIndicator(**lvl)
                    indicators_list.append(elem)

            except Exception as e:
                logger.error(f"Toxicity.from_result CAS={cas}: estrazione {tt} fallita: {e}")
                continue

        return cls(
            cas=cas,
            indicators=indicators_list
        )

class Ingredient(BaseModel):
    cas: str
    inci: Optional[List[str]] = None
    dap_info: Optional[DapInfo] = None
    cosing_info: Optional[List[CosingInfo]] = None
    toxicity: Optional[Toxicity] = None
    creation_date: Optional[str] = None

    @classmethod
    def ingredient_builder(cls, cas: str, inci: Optional[List[str]] = None):
        logger.info(f"ingredient_builder CAS={cas}: inizio scraping")

        dap_data = pubchem_dap(cas)
        dap_info = DapInfo.dap_builder(dap_data) if isinstance(dap_data, dict) else None
        if not dap_info:
            logger.warning(f"CAS={cas}: nessun dato DAP da PubChem")

        cosing_data = cosing_entry(cas)
        cosing_info = CosingInfo.cycle_identified(cosing_data) if cosing_data else None
        if not cosing_info:
            logger.warning(f"CAS={cas}: nessun dato COSING")

        toxicity_data = orchestrator(cas)
        toxicity = Toxicity.from_result(cas, toxicity_data) if toxicity_data else None
        if not toxicity or not toxicity.indicators:
            logger.warning(f"CAS={cas}: nessun indicatore tossicologico trovato")

        logger.info(f"CAS={cas}: scraping completato (dap={'OK' if dap_info else '-'}, cosing={'OK' if cosing_info else '-'}, tox={len(toxicity.indicators) if toxicity else 0} ind.)")

        return cls(
            cas=cas,
            inci=inci,
            dap_info=dap_info,
            cosing_info=cosing_info,
            toxicity=toxicity
        )

    @model_validator(mode='after')
    def set_creation_date(self) -> 'Ingredient':
        if self.creation_date is None:
            self.creation_date = dt.now().isoformat()
        return self

    def update_ingredient(self, attr : str, data : dict):
        setattr(self, attr, data)

    def to_mongo_dict(self):
        mongo_dict = self.model_dump()
        return mongo_dict

    def save(self):
        """Salva l'ingrediente su MongoDB (collection 'ingredients') e crea/aggiorna la riga in PostgreSQL."""
        from pif_compiler.functions.db_utils import db_connect

        collection = db_connect(collection_name='ingredients')
        if collection is None:
            logger.error(f"Ingredient.save CAS={self.cas}: connessione MongoDB fallita")
            return None

        mongo_dict = self.to_mongo_dict()

        result = collection.replace_one(
            {"cas": self.cas},
            mongo_dict,
            upsert=True
        )

        if result.upserted_id:
            mongo_id = str(result.upserted_id)
        else:
            doc = collection.find_one({"cas": self.cas}, {"_id": 1})
            mongo_id = str(doc["_id"])

        has_dap = self.dap_info is not None
        has_cosing = self.cosing_info is not None
        has_tox = self.toxicity is not None

        upsert_ingrediente(self.cas, mongo_id, dap=has_dap, cosing=has_cosing, tox=has_tox)

        logger.debug(f"Ingredient.save CAS={self.cas}: mongo_id={mongo_id}")
        return mongo_id

    @classmethod
    def from_cas(cls, cas: str):
        """Recupera un ingrediente da MongoDB tramite il CAS, usando PostgreSQL come indice."""
        from pif_compiler.functions.db_utils import db_connect
        from bson import ObjectId

        pg_entry = get_ingrediente_by_cas(cas)
        if not pg_entry:
            return None

        _, _, mongo_id, _, _, _ = pg_entry
        if not mongo_id:
            logger.warning(f"from_cas CAS={cas}: presente in PG ma mongo_id è NULL")
            return None

        collection = db_connect(collection_name='ingredients')
        doc = collection.find_one({"_id": ObjectId(mongo_id)})
        if not doc:
            logger.warning(f"from_cas CAS={cas}: mongo_id={mongo_id} non trovato in MongoDB")
            return None

        doc.pop("_id", None)
        return cls(**doc)

    @classmethod
    def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
        """Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
        Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.
        Al re-scraping, i campi che risultano None vengono sostituiti con il valore cached
        per evitare regressioni di dati in caso di fallimenti temporanei delle fonti esterne."""
        cached = None
        if not force:
            cached = cls.from_cas(cas)
            if cached and not cached.is_old():
                logger.debug(f"get_or_create CAS={cas}: cache hit")
                return cached

            if cached:
                logger.info(f"get_or_create CAS={cas}: cache scaduta, re-scraping")
        else:
            logger.info(f"get_or_create CAS={cas}: force refresh")

        ingredient = cls.ingredient_builder(cas, inci=inci)

        if cached:
            if ingredient.dap_info is None and cached.dap_info is not None:
                logger.warning(f"get_or_create CAS={cas}: dap_info non ottenuto, mantengo dati cached")
                ingredient.dap_info = cached.dap_info
            if ingredient.cosing_info is None and cached.cosing_info is not None:
                logger.warning(f"get_or_create CAS={cas}: cosing_info non ottenuto, mantengo dati cached")
                ingredient.cosing_info = cached.cosing_info
            if ingredient.toxicity is None and cached.toxicity is not None:
                logger.warning(f"get_or_create CAS={cas}: toxicity non ottenuta, mantengo dati cached")
                ingredient.toxicity = cached.toxicity
            elif ingredient.toxicity is not None and cached.toxicity is not None:
                custom_indicators = [i for i in cached.toxicity.indicators if i.is_custom]
                if custom_indicators:
                    logger.info(f"get_or_create CAS={cas}: preservo {len(custom_indicators)} indicatori custom nel re-scraping")
                    ingredient.toxicity = Toxicity(
                        cas=ingredient.toxicity.cas,
                        indicators=ingredient.toxicity.indicators + custom_indicators
                    )

        ingredient.save()
        return ingredient

    def get_stats(self):
        stats = {
            "has_dap_info": self.dap_info is not None,
            "has_cosing_info": self.cosing_info is not None,
            "has_toxicity_info": self.toxicity is not None,
            "num_tox_indicators": len(self.toxicity.indicators) if self.toxicity else 0,
            "has_best_tox_indicator": self.toxicity.best_case is not None if self.toxicity else False,
            "has_restrictions_in_cosing": any(self.cosing_info[0].annex) if self.cosing_info else False,
            "has_noael_indicator": any(ind.indicator == 'NOAEL' for ind in self.toxicity.indicators) if self.toxicity else False,
            "has_ld50_indicator": any(ind.indicator == 'LD50' for ind in self.toxicity.indicators) if self.toxicity else False,
            "has_loael_indicator": any(ind.indicator == 'LOAEL' for ind in self.toxicity.indicators) if self.toxicity else False
        }
        return stats

    def is_old(self, threshold_days: int = 365) -> bool:
        if not self.creation_date:
            return True

        creation_dt = dt.fromisoformat(self.creation_date)
        current_dt = dt.now()
        delta = current_dt - creation_dt

        return delta.days > threshold_days

    def add_inci_name(self, inci_name: str):
        if self.inci is None:
            self.inci = []
        if inci_name not in self.inci:
            self.inci.append(inci_name)

    def return_best_toxicity(self) -> Optional[ToxIndicator]:
        if self.toxicity and self.toxicity.best_case:
            return self.toxicity.best_case
        return None

    def return_cosing_restrictions(self) -> List[str]:
        restrictions = []
        if self.cosing_info:
            for cosing in self.cosing_info:
                restrictions.extend(cosing.annex)
        return restrictions

    def add_tox_indicator(self, indicator: ToxIndicator):
        """Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
        indicator.is_custom = True
        if self.toxicity is None:
            self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
        else:
            new_indicators = self.toxicity.indicators + [indicator]
            self.toxicity = Toxicity(cas=self.cas, indicators=new_indicators)
        self.save()

class Esposition(BaseModel):
    preset_name: str
    tipo_prodotto: str
    popolazione_target: str = "Adulti"
    peso_target_kg: float = 60.0

    luogo_applicazione: str
    esp_normali: List[str]
    esp_secondarie: List[str]
    esp_nano: List[str]

    sup_esposta: int = Field(ge=1, le=20000, description="Area di applicazione in cm2")
    freq_applicazione: float = Field(default=1, description="Numero di applicazioni al giorno")
    qta_giornaliera: float = Field(..., description="Quantità di prodotto applicata (g/die)")
    ritenzione: float = Field(default=1.0, ge=0.01, le=1.0, description="Fattore di ritenzione")

    note: Optional[str] = None

    @field_validator('esp_normali', 'esp_secondarie', 'esp_nano', mode='before')
    @classmethod
    def parse_postgres_array(cls, v):
        # Se Postgres restituisce una stringa tipo '{a,b}' la trasformiamo in ['a','b']
        if isinstance(v, str):
            cleaned = v.strip('{}[]')
            return [item.strip() for item in cleaned.split(',')] if cleaned else []
        return v

    @computed_field
    @property
    def esposizione_calcolata(self) -> float:
        return self.qta_giornaliera * self.ritenzione

    @computed_field
    @property
    def esposizione_relativa(self) -> float:
        return (self.esposizione_calcolata * 1000) / self.peso_target_kg

    def save_to_postgres(self):
        data = self.model_dump(mode='json')
        query = """INSERT INTO tipi_prodotti (
            preset_name, tipo_prodotto, luogo_applicazione,
            esp_normali, esp_secondarie, esp_nano,
            sup_esposta, freq_applicazione, qta_giornaliera, ritenzione
        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) RETURNING id_preset;"""

        conn = postgres_connect()
        try:
            with conn.cursor() as cur:
                cur.execute(query, (
                    data.get("preset_name"), data.get("tipo_prodotto"),
                    data.get("luogo_applicazione"), data.get("esp_normali"),
                    data.get("esp_secondarie"), data.get("esp_nano"),
                    data.get("sup_esposta"), data.get("freq_applicazione"),
                    data.get("qta_giornaliera"), data.get("ritenzione")
                ))
                result = cur.fetchone()
                conn.commit()
                return result[0] if result else None
        except Exception as e:
            logger.error(f"Esposition.save_to_postgres '{self.preset_name}': {e}")
            conn.rollback()
            return False
        finally:
            conn.close()

    @classmethod
    def get_presets(cls):
        conn = postgres_connect()
        try:
            with conn.cursor() as cur:
                cur.execute("SELECT preset_name, tipo_prodotto, luogo_applicazione, esp_normali, esp_secondarie, esp_nano, sup_esposta, freq_applicazione, qta_giornaliera, ritenzione FROM tipi_prodotti;")
                results = cur.fetchall()

                lista_oggetti = []
                for r in results:
                    obj = cls(
                        preset_name=r[0],
                        tipo_prodotto=r[1],
                        luogo_applicazione=r[2],
                        esp_normali=r[3],
                        esp_secondarie=r[4],
                        esp_nano=r[5],
                        sup_esposta=r[6],
                        freq_applicazione=r[7],
                        qta_giornaliera=r[8],
                        ritenzione=r[9]
                    )
                    lista_oggetti.append(obj)
                return lista_oggetti
        except Exception as e:
            logger.error(f"Esposition.get_presets: {e}")
            return []
        finally:
            conn.close()

    @classmethod
    def get_by_name(cls, preset_name: str):
        """Recupera un preset di esposizione per nome."""
        conn = postgres_connect()
        try:
            with conn.cursor() as cur:
                cur.execute(
                    """SELECT preset_name, tipo_prodotto, luogo_applicazione,
                              esp_normali, esp_secondarie, esp_nano,
                              sup_esposta, freq_applicazione, qta_giornaliera, ritenzione
                       FROM tipi_prodotti WHERE preset_name = %s""",
                    (preset_name,)
                )
                r = cur.fetchone()
            if r:
                return cls(
                    preset_name=r[0], tipo_prodotto=r[1], luogo_applicazione=r[2],
                    esp_normali=r[3], esp_secondarie=r[4], esp_nano=r[5],
                    sup_esposta=r[6], freq_applicazione=r[7],
                    qta_giornaliera=r[8], ritenzione=r[9]
                )
            return None
        except Exception as e:
            logger.error(f"Esposition.get_by_name '{preset_name}': {e}")
            return None
        finally:
            conn.close()

    @classmethod
    def delete_by_name(cls, preset_name: str) -> bool:
        conn = postgres_connect()
        try:
            with conn.cursor() as cur:
                cur.execute("DELETE FROM tipi_prodotti WHERE preset_name = %s RETURNING id_preset;", (preset_name,))
                result = cur.fetchone()
                conn.commit()
                return result is not None
        except Exception as e:
            logger.error(f"Esposition.delete_by_name '{preset_name}': {e}")
            conn.rollback()
            return False
        finally:
            conn.close()