scadenza, caching e log
This commit is contained in:
parent
da5e332efa
commit
bba5c11bc9
4 changed files with 75 additions and 333 deletions
|
|
@ -1,246 +0,0 @@
|
|||
"""
|
||||
Script per creare un ordine mock con 4 ingredienti per testare la UI.
|
||||
Inserisce direttamente nei database senza passare dalla pipeline (no scraping).
|
||||
|
||||
Uso: uv run python scripts/create_mock_order.py
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Aggiungi il path del progetto
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
||||
|
||||
from pif_compiler.functions.db_utils import (
|
||||
db_connect, upsert_cliente, insert_ordine, aggiorna_stato_ordine,
|
||||
update_ordine_cliente, upsert_ingrediente
|
||||
)
|
||||
from pif_compiler.classes.models import (
|
||||
StatoOrdine, Ingredient, DapInfo, CosingInfo, ToxIndicator, Toxicity, Esposition
|
||||
)
|
||||
from pif_compiler.classes.main_workflow import Project, ProjectIngredient
|
||||
|
||||
|
||||
def ensure_preset_exists(preset_name="Test Preset"):
|
||||
"""Verifica che il preset esista, altrimenti lo crea."""
|
||||
preset = Esposition.get_by_name(preset_name)
|
||||
if preset:
|
||||
print(f"Preset '{preset_name}' già esistente")
|
||||
return preset
|
||||
|
||||
print(f"Creazione preset '{preset_name}'...")
|
||||
preset = Esposition(
|
||||
preset_name=preset_name,
|
||||
tipo_prodotto="Crema corpo",
|
||||
luogo_applicazione="Corpo",
|
||||
esp_normali=["Dermal"],
|
||||
esp_secondarie=["Oral"],
|
||||
esp_nano=[],
|
||||
sup_esposta=15670,
|
||||
freq_applicazione=1,
|
||||
qta_giornaliera=7.82,
|
||||
ritenzione=1.0
|
||||
)
|
||||
result = preset.save_to_postgres()
|
||||
if result:
|
||||
print(f"Preset creato con id_preset={result}")
|
||||
else:
|
||||
print("ERRORE: impossibile creare il preset")
|
||||
sys.exit(1)
|
||||
return preset
|
||||
|
||||
|
||||
def create_mock_ingredients():
|
||||
"""Crea ingredienti mock con dati finti di tossicologia e DAP."""
|
||||
|
||||
# GLYCERIN (56-81-5) — con NOAEL
|
||||
glycerin = Ingredient(
|
||||
cas="56-81-5",
|
||||
inci=["GLYCERIN"],
|
||||
dap_info=DapInfo(
|
||||
cas="56-81-5",
|
||||
molecular_weight=92.09,
|
||||
log_pow=-1.76,
|
||||
tpsa=60.69,
|
||||
melting_point=18.0
|
||||
),
|
||||
cosing_info=[CosingInfo(
|
||||
cas=["56-81-5"],
|
||||
common_names=["Glycerol"],
|
||||
inci=["GLYCERIN"],
|
||||
annex=[],
|
||||
functionName=["Humectant", "Solvent", "Skin conditioning"],
|
||||
otherRestrictions=[],
|
||||
cosmeticRestriction=None
|
||||
)],
|
||||
toxicity=Toxicity(
|
||||
cas="56-81-5",
|
||||
indicators=[
|
||||
ToxIndicator(
|
||||
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
|
||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
||||
ref="https://chem.echa.europa.eu/100.003.264"
|
||||
),
|
||||
ToxIndicator(
|
||||
indicator="LD50", value=12600, unit="mg/kg bw",
|
||||
route="oral", toxicity_type="acute_toxicity",
|
||||
ref="https://chem.echa.europa.eu/100.003.264"
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# CETYL ALCOHOL (36653-82-4) — con NOAEL
|
||||
cetyl = Ingredient(
|
||||
cas="36653-82-4",
|
||||
inci=["CETYL ALCOHOL"],
|
||||
dap_info=DapInfo(
|
||||
cas="36653-82-4",
|
||||
molecular_weight=242.44,
|
||||
log_pow=6.83,
|
||||
tpsa=20.23,
|
||||
melting_point=49.0
|
||||
),
|
||||
cosing_info=[CosingInfo(
|
||||
cas=["36653-82-4"],
|
||||
common_names=["Cetyl alcohol", "1-Hexadecanol"],
|
||||
inci=["CETYL ALCOHOL"],
|
||||
annex=[],
|
||||
functionName=["Emollient", "Emulsifying", "Opacifying"],
|
||||
otherRestrictions=[],
|
||||
cosmeticRestriction=None
|
||||
)],
|
||||
toxicity=Toxicity(
|
||||
cas="36653-82-4",
|
||||
indicators=[
|
||||
ToxIndicator(
|
||||
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
|
||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
||||
ref="https://chem.echa.europa.eu/100.004.098"
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# TOCOPHEROL (59-02-9) — con LOAEL
|
||||
tocopherol = Ingredient(
|
||||
cas="59-02-9",
|
||||
inci=["TOCOPHEROL"],
|
||||
dap_info=DapInfo(
|
||||
cas="59-02-9",
|
||||
molecular_weight=430.71,
|
||||
log_pow=10.51,
|
||||
tpsa=29.46,
|
||||
melting_point=3.0
|
||||
),
|
||||
cosing_info=[CosingInfo(
|
||||
cas=["59-02-9"],
|
||||
common_names=["alpha-Tocopherol"],
|
||||
inci=["TOCOPHEROL"],
|
||||
annex=[],
|
||||
functionName=["Antioxidant", "Skin conditioning"],
|
||||
otherRestrictions=[],
|
||||
cosmeticRestriction=None
|
||||
)],
|
||||
toxicity=Toxicity(
|
||||
cas="59-02-9",
|
||||
indicators=[
|
||||
ToxIndicator(
|
||||
indicator="LOAEL", value=500, unit="mg/kg bw/day",
|
||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
||||
ref="https://chem.echa.europa.eu/100.000.375"
|
||||
)
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# Salva ogni ingrediente su MongoDB + PostgreSQL
|
||||
for ing in [glycerin, cetyl, tocopherol]:
|
||||
mongo_id = ing.save()
|
||||
print(f"Ingrediente {ing.cas} ({ing.inci[0]}) salvato (mongo_id={mongo_id})")
|
||||
|
||||
return glycerin, cetyl, tocopherol
|
||||
|
||||
|
||||
def create_mock_order(preset, glycerin, cetyl, tocopherol):
|
||||
"""Crea un ordine mock completo."""
|
||||
|
||||
# 1. Upsert cliente
|
||||
client_name = "Cosmetica Test Srl"
|
||||
id_cliente = upsert_cliente(client_name)
|
||||
print(f"Cliente '{client_name}' → id_cliente={id_cliente}")
|
||||
|
||||
# 2. JSON ordine grezzo
|
||||
raw_json = {
|
||||
"client_name": client_name,
|
||||
"product_name": "Crema Idratante Test",
|
||||
"preset_esposizione": preset.preset_name,
|
||||
"ingredients": [
|
||||
{"inci": "AQUA", "cas": "", "percentage": 70.0, "is_colorante": False, "skip_tox": True},
|
||||
{"inci": "GLYCERIN", "cas": "56-81-5", "percentage": 15.0, "is_colorante": False, "skip_tox": False},
|
||||
{"inci": "CETYL ALCOHOL", "cas": "36653-82-4", "percentage": 10.0, "is_colorante": False, "skip_tox": False},
|
||||
{"inci": "TOCOPHEROL", "cas": "59-02-9", "percentage": 5.0, "is_colorante": False, "skip_tox": False},
|
||||
]
|
||||
}
|
||||
|
||||
# 3. Salva su MongoDB orders
|
||||
orders_col = db_connect(collection_name='orders')
|
||||
result = orders_col.insert_one(raw_json.copy())
|
||||
uuid_ordine = str(result.inserted_id)
|
||||
print(f"Ordine salvato su MongoDB: uuid_ordine={uuid_ordine}")
|
||||
|
||||
# 4. Inserisci in PostgreSQL ordini
|
||||
id_ordine = insert_ordine(uuid_ordine, id_cliente)
|
||||
print(f"Ordine inserito in PostgreSQL: id_ordine={id_ordine}")
|
||||
|
||||
# 5. Aggiorna stato a ARRICCHITO
|
||||
update_ordine_cliente(id_ordine, id_cliente)
|
||||
aggiorna_stato_ordine(id_ordine, int(StatoOrdine.ARRICCHITO))
|
||||
print(f"Stato ordine aggiornato a ARRICCHITO ({StatoOrdine.ARRICCHITO})")
|
||||
|
||||
# 6. Crea progetto con ingredienti arricchiti
|
||||
project = Project(
|
||||
order_id=id_ordine,
|
||||
product_name="Crema Idratante Test",
|
||||
client_name=client_name,
|
||||
esposition=preset,
|
||||
ingredients=[
|
||||
ProjectIngredient(cas=None, inci="AQUA", percentage=70.0, skip_tox=True),
|
||||
ProjectIngredient(cas="56-81-5", inci="GLYCERIN", percentage=15.0, ingredient=glycerin),
|
||||
ProjectIngredient(cas="36653-82-4", inci="CETYL ALCOHOL", percentage=10.0, ingredient=cetyl),
|
||||
ProjectIngredient(cas="59-02-9", inci="TOCOPHEROL", percentage=5.0, ingredient=tocopherol),
|
||||
]
|
||||
)
|
||||
|
||||
# 7. Salva il progetto (MongoDB + PostgreSQL)
|
||||
uuid_progetto = project.save()
|
||||
print(f"Progetto salvato: uuid_progetto={uuid_progetto}")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("MOCK ORDER CREATO CON SUCCESSO")
|
||||
print("=" * 60)
|
||||
print(f" id_ordine: {id_ordine}")
|
||||
print(f" uuid_ordine: {uuid_ordine}")
|
||||
print(f" uuid_progetto: {uuid_progetto}")
|
||||
print(f" cliente: {client_name}")
|
||||
print(f" prodotto: Crema Idratante Test")
|
||||
print(f" preset: {preset.preset_name}")
|
||||
print(f" ingredienti: 4 (AQUA, GLYCERIN, CETYL ALCOHOL, TOCOPHEROL)")
|
||||
print(f" stato: ARRICCHITO ({StatoOrdine.ARRICCHITO})")
|
||||
print("=" * 60)
|
||||
|
||||
return id_ordine
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Creazione ordine mock...")
|
||||
print()
|
||||
|
||||
# 1. Assicura che il preset esista
|
||||
preset = ensure_preset_exists()
|
||||
|
||||
# 2. Crea ingredienti mock
|
||||
glycerin, cetyl, tocopherol = create_mock_ingredients()
|
||||
|
||||
# 3. Crea l'ordine
|
||||
create_mock_order(preset, glycerin, cetyl, tocopherol)
|
||||
|
|
@ -127,6 +127,7 @@ class CosingInfo(BaseModel):
|
|||
otherRestrictions : List[str] = Field(default_factory=list)
|
||||
cosmeticRestriction : Optional[str] = None
|
||||
reference : Optional[str] = None
|
||||
substanceId : Optional[str] = None
|
||||
sccsOpinionUrls : List[str] = Field(default_factory=list)
|
||||
|
||||
@classmethod
|
||||
|
|
@ -140,6 +141,7 @@ class CosingInfo(BaseModel):
|
|||
'otherRestrictions',
|
||||
'cosmeticRestriction',
|
||||
'reference',
|
||||
'substanceId',
|
||||
'inciName',
|
||||
'sccsOpinionUrls'
|
||||
]
|
||||
|
|
@ -185,6 +187,8 @@ class CosingInfo(BaseModel):
|
|||
cosing_dict['cosmeticRestriction'] = cosing_data[k]
|
||||
if k == 'reference':
|
||||
cosing_dict['reference'] = cosing_data[k]
|
||||
if k == 'substanceId':
|
||||
cosing_dict['substanceId'] = cosing_data[k]
|
||||
if k == 'sccsOpinionUrls':
|
||||
urls = []
|
||||
for url in cosing_data[k]:
|
||||
|
|
@ -213,6 +217,7 @@ class ToxIndicator(BaseModel):
|
|||
toxicity_type : Optional[str] = None
|
||||
ref : Optional[str] = None
|
||||
source : Optional[str] = None
|
||||
is_custom : bool = False
|
||||
|
||||
@property
|
||||
def priority_rank(self):
|
||||
|
|
@ -392,7 +397,10 @@ class Ingredient(BaseModel):
|
|||
@classmethod
|
||||
def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
|
||||
"""Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
|
||||
Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento."""
|
||||
Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.
|
||||
Al re-scraping, i campi che risultano None vengono sostituiti con il valore cached
|
||||
per evitare regressioni di dati in caso di fallimenti temporanei delle fonti esterne."""
|
||||
cached = None
|
||||
if not force:
|
||||
cached = cls.from_cas(cas)
|
||||
if cached and not cached.is_old():
|
||||
|
|
@ -405,6 +413,26 @@ class Ingredient(BaseModel):
|
|||
logger.info(f"get_or_create CAS={cas}: force refresh")
|
||||
|
||||
ingredient = cls.ingredient_builder(cas, inci=inci)
|
||||
|
||||
if cached:
|
||||
if ingredient.dap_info is None and cached.dap_info is not None:
|
||||
logger.warning(f"get_or_create CAS={cas}: dap_info non ottenuto, mantengo dati cached")
|
||||
ingredient.dap_info = cached.dap_info
|
||||
if ingredient.cosing_info is None and cached.cosing_info is not None:
|
||||
logger.warning(f"get_or_create CAS={cas}: cosing_info non ottenuto, mantengo dati cached")
|
||||
ingredient.cosing_info = cached.cosing_info
|
||||
if ingredient.toxicity is None and cached.toxicity is not None:
|
||||
logger.warning(f"get_or_create CAS={cas}: toxicity non ottenuta, mantengo dati cached")
|
||||
ingredient.toxicity = cached.toxicity
|
||||
elif ingredient.toxicity is not None and cached.toxicity is not None:
|
||||
custom_indicators = [i for i in cached.toxicity.indicators if i.is_custom]
|
||||
if custom_indicators:
|
||||
logger.info(f"get_or_create CAS={cas}: preservo {len(custom_indicators)} indicatori custom nel re-scraping")
|
||||
ingredient.toxicity = Toxicity(
|
||||
cas=ingredient.toxicity.cas,
|
||||
indicators=ingredient.toxicity.indicators + custom_indicators
|
||||
)
|
||||
|
||||
ingredient.save()
|
||||
return ingredient
|
||||
|
||||
|
|
@ -452,6 +480,7 @@ class Ingredient(BaseModel):
|
|||
|
||||
def add_tox_indicator(self, indicator: ToxIndicator):
|
||||
"""Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
|
||||
indicator.is_custom = True
|
||||
if self.toxicity is None:
|
||||
self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -120,7 +120,8 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
|
|||
# --- Tox best_case PDF ---
|
||||
best = ing.toxicity.best_case if ing.toxicity else None
|
||||
if best and best.ref:
|
||||
pdf_name = f"{pi.cas}_{best.source}" if best.source else pi.cas
|
||||
source_label = best.source or best.toxicity_type or "tox"
|
||||
pdf_name = f"{pi.cas}_{source_label}"
|
||||
log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}")
|
||||
success = await generate_pdf(best.ref, pdf_name)
|
||||
if success:
|
||||
|
|
@ -143,7 +144,7 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
|
|||
generated.append(pdf_path)
|
||||
continue
|
||||
|
||||
log.info(f"Download COSING PDF: {pdf_name} (ref={cosing.reference})")
|
||||
log.info(f"Download COSING PDF: {pdf_name} (reference={cosing.reference})")
|
||||
content = cosing_download(cosing.reference)
|
||||
if isinstance(content, bytes):
|
||||
with open(pdf_path, 'wb') as f:
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from playwright.sync_api import sync_playwright
|
|||
from typing import Callable, Any
|
||||
|
||||
from pif_compiler.functions.common_log import get_logger
|
||||
from pif_compiler.functions.db_utils import db_connect, log_ricerche
|
||||
from pif_compiler.functions.db_utils import log_ricerche
|
||||
|
||||
log = get_logger()
|
||||
load_dotenv()
|
||||
|
|
@ -30,12 +30,12 @@ legislation = "&legislation=REACH"
|
|||
def search_substance(cas : str) -> dict:
|
||||
response = requests.get(BASE_SEARCH + cas)
|
||||
if response.status_code != 200:
|
||||
log.error(f"Network error: {response.status_code}")
|
||||
log.error(f"search_substance CAS={cas}: HTTP {response.status_code}")
|
||||
return {}
|
||||
else:
|
||||
response = response.json()
|
||||
if response['state']['totalItems'] == 0:
|
||||
log.info(f"No substance found for CAS {cas}")
|
||||
log.warning(f"search_substance CAS={cas}: nessuna sostanza trovata su ECHA")
|
||||
return {}
|
||||
else:
|
||||
for result in response['items']:
|
||||
|
|
@ -47,9 +47,9 @@ def search_substance(cas : str) -> dict:
|
|||
"rmlName": result["substanceIndex"]["rmlName"],
|
||||
"rmlId": result["substanceIndex"]["rmlId"]
|
||||
}
|
||||
log.info(f"Substance found for CAS {cas}: {substance['rmlName']}")
|
||||
log.debug(f"search_substance CAS={cas}: trovata '{substance['rmlName']}'")
|
||||
return substance
|
||||
log.error(f"Something went wrong searching the substance for CAS {cas}")
|
||||
log.warning(f"search_substance CAS={cas}: {response['state']['totalItems']} risultati ma nessun match esatto sul CAS")
|
||||
return {}
|
||||
|
||||
|
||||
|
|
@ -57,14 +57,16 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
|
|||
url = BASE_DOSSIER + rmlId + type + legislation
|
||||
response_dossier = requests.get(url)
|
||||
if response_dossier.status_code != 200:
|
||||
log.error(f"Network error: {response_dossier.status_code}")
|
||||
log.error(f"get_dossier_info rmlId={rmlId}: HTTP {response_dossier.status_code}")
|
||||
return {}
|
||||
response_dossier_json = response_dossier.json()
|
||||
if response_dossier_json['state']['totalItems'] == 0:
|
||||
log.info(f"No dossier found for RML ID {rmlId}")
|
||||
if type == active:
|
||||
log.debug(f"get_dossier_info rmlId={rmlId}: nessun dossier attivo, provo inattivi")
|
||||
return get_dossier_info(rmlId, inactive)
|
||||
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier trovato (né attivo né inattivo)")
|
||||
return {}
|
||||
dossier_info = {}
|
||||
for dossier in response_dossier_json['items']:
|
||||
if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)":
|
||||
dossier_info = {
|
||||
|
|
@ -75,7 +77,8 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
|
|||
"assetExternalId": dossier['assetExternalId'],
|
||||
"rootKey": dossier['rootKey']
|
||||
}
|
||||
log.info(f"Dossier info retrieved for RML ID {rmlId}")
|
||||
if not dossier_info:
|
||||
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier 'Article 10 - full / Lead' tra i {response_dossier_json['state']['totalItems']} trovati")
|
||||
return dossier_info
|
||||
|
||||
|
||||
|
|
@ -85,7 +88,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
|||
|
||||
response = requests.get(INDEX + "/index.html")
|
||||
if response.status_code != 200:
|
||||
log.error(f"Network error: {response.status_code}")
|
||||
log.error(f"get_substance_index {assetExternalId}: HTTP {response.status_code}")
|
||||
return {}
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
|
|
@ -98,7 +101,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
|||
txi_href = txi_link['href']
|
||||
index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html'
|
||||
except Exception as e:
|
||||
log.error(f"Error retrieving toxicological information link: {e}")
|
||||
log.warning(f"get_substance_index: link tossicologia non trovato — {e}")
|
||||
index_data['toxicological_information_link'] = None
|
||||
|
||||
# Repeated dose toxicity : rdt
|
||||
|
|
@ -108,7 +111,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
|||
rdt_href = rdt_link['href']
|
||||
index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html'
|
||||
except Exception as e:
|
||||
log.error(f"Error retrieving repeated dose toxicity link: {e}")
|
||||
log.warning(f"get_substance_index: link repeated dose non trovato — {e}")
|
||||
index_data['repeated_dose_toxicity_link'] = None
|
||||
|
||||
# Acute toxicity : at
|
||||
|
|
@ -118,11 +121,9 @@ def get_substance_index(assetExternalId : str) -> dict:
|
|||
at_href = at_link['href']
|
||||
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
|
||||
except Exception as e:
|
||||
log.error(f"Error retrieving acute toxicity link: {e}")
|
||||
log.warning(f"get_substance_index: link acute toxicity non trovato — {e}")
|
||||
index_data['acute_toxicity_link'] = None
|
||||
|
||||
log.info(f"Substance index retrieved for Asset External ID {assetExternalId}")
|
||||
|
||||
return index_data
|
||||
|
||||
|
||||
|
|
@ -429,8 +430,8 @@ def echa_flow(cas) -> dict:
|
|||
substance = search_substance(cas)
|
||||
dossier_info = get_dossier_info(substance['rmlId'])
|
||||
index = get_substance_index(dossier_info['assetExternalId'])
|
||||
except Exception as e:
|
||||
log.error(f"Error in ECHA flow for CAS {cas}: {e}")
|
||||
except KeyError as e:
|
||||
log.error(f"echa_flow CAS={cas}: chiave mancante nella risposta ECHA — {e}")
|
||||
return {}
|
||||
|
||||
result = {
|
||||
|
|
@ -442,14 +443,14 @@ def echa_flow(cas) -> dict:
|
|||
"repeated_dose_toxicity": {}
|
||||
}
|
||||
|
||||
log.debug(f"ECHA flow intermediate result")
|
||||
|
||||
# Fetch and parse toxicological information
|
||||
txi_link = index.get('toxicological_information_link')
|
||||
if txi_link:
|
||||
response_summary = requests.get(txi_link)
|
||||
if response_summary.status_code == 200:
|
||||
result['toxicological_information'] = parse_toxicology_html(response_summary.content)
|
||||
else:
|
||||
log.warning(f"echa_flow CAS={cas}: tossicologia HTTP {response_summary.status_code}")
|
||||
|
||||
# Fetch and parse acute toxicity
|
||||
at_link = index.get('acute_toxicity_link')
|
||||
|
|
@ -457,6 +458,8 @@ def echa_flow(cas) -> dict:
|
|||
response_acute = requests.get(at_link)
|
||||
if response_acute.status_code == 200:
|
||||
result['acute_toxicity'] = parse_toxicology_html(response_acute.content)
|
||||
else:
|
||||
log.warning(f"echa_flow CAS={cas}: acute toxicity HTTP {response_acute.status_code}")
|
||||
|
||||
# Fetch and parse repeated dose toxicity
|
||||
rdt_link = index.get('repeated_dose_toxicity_link')
|
||||
|
|
@ -464,86 +467,41 @@ def echa_flow(cas) -> dict:
|
|||
response_repeated = requests.get(rdt_link)
|
||||
if response_repeated.status_code == 200:
|
||||
result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content)
|
||||
|
||||
for key, value in result.items():
|
||||
if value is None or value == "" or value == [] or value == {}:
|
||||
log.warning(f"Missing data for key: {key} in CAS {cas}")
|
||||
else:
|
||||
log.info(f"Data retrieved for key: {key} in CAS {cas}")
|
||||
log.warning(f"echa_flow CAS={cas}: repeated dose HTTP {response_repeated.status_code}")
|
||||
|
||||
txi_ok = bool(result['toxicological_information'])
|
||||
at_ok = bool(result['acute_toxicity'])
|
||||
rdt_ok = bool(result['repeated_dose_toxicity'])
|
||||
log.info(f"echa_flow CAS={cas}: txi={'OK' if txi_ok else '-'}, acute={'OK' if at_ok else '-'}, rdt={'OK' if rdt_ok else '-'}")
|
||||
|
||||
return result
|
||||
|
||||
def cas_validation(cas: str) -> str:
|
||||
log.info(f"Starting ECHA data extraction for CAS: {cas}")
|
||||
if cas is None or cas.strip() == "":
|
||||
log.error("No CAS number provided.")
|
||||
log.error("cas_validation: CAS vuoto o None")
|
||||
return None
|
||||
|
||||
cas_stripped = cas.replace("-", "")
|
||||
if cas_stripped.isdigit() and len(cas_stripped) <= 12:
|
||||
log.info(f"CAS number {cas} maybe is valid.")
|
||||
return cas.strip()
|
||||
else:
|
||||
log.error(f"CAS number {cas} is not valid.")
|
||||
return None
|
||||
|
||||
def check_local(cas: str) -> bool:
|
||||
collection = db_connect()
|
||||
|
||||
if collection is None:
|
||||
log.error("No MongoDB collection available.")
|
||||
return None
|
||||
|
||||
record = collection.find_one({"substance.rmlCas": cas})
|
||||
|
||||
if record:
|
||||
log.info(f"Record for CAS {cas} found in local database.")
|
||||
return record
|
||||
else:
|
||||
log.info(f"No record for CAS {cas} found in local database.")
|
||||
return None
|
||||
|
||||
def add_to_local(data: dict) -> bool:
|
||||
collection = db_connect()
|
||||
|
||||
if collection is None:
|
||||
log.error("No MongoDB collection available.")
|
||||
return False
|
||||
|
||||
try:
|
||||
collection.insert_one(data)
|
||||
log.info(f"Data for CAS {data['substance']['rmlCas']} added to local database.")
|
||||
return True
|
||||
except Exception as e:
|
||||
log.error(f"Error inserting data into MongoDB: {e}")
|
||||
return False
|
||||
log.error(f"cas_validation: CAS '{cas}' non valido (formato non riconosciuto)")
|
||||
return None
|
||||
|
||||
def orchestrator(cas: str) -> dict:
|
||||
log.debug(f"Initiating search for CAS {cas} in ECHA service.")
|
||||
log.debug(f"ECHA orchestrator CAS={cas}")
|
||||
cas_validated = cas_validation(cas)
|
||||
if not cas_validated:
|
||||
return None
|
||||
else:
|
||||
log.info(f"CAS {cas} validated successfully.")
|
||||
local_record = check_local(cas_validated)
|
||||
if local_record:
|
||||
log.info(f"Returning local record for CAS {cas}.")
|
||||
log_ricerche(cas, 'ECHA', True)
|
||||
return local_record
|
||||
else:
|
||||
log.info(f"No local record, starting echa flow")
|
||||
echa_data = echa_flow(cas_validated)
|
||||
if echa_data:
|
||||
log.info(f"Echa flow successful")
|
||||
log_ricerche(cas, 'ECHA', True)
|
||||
add_to_local(echa_data)
|
||||
return echa_data
|
||||
else:
|
||||
log.error(f"Failed to retrieve ECHA data for CAS {cas}.")
|
||||
log_ricerche(cas, 'ECHA', False)
|
||||
return None
|
||||
|
||||
# to do: check if document is complete
|
||||
# to do: check lastupdate
|
||||
echa_data = echa_flow(cas_validated)
|
||||
if echa_data:
|
||||
log.info(f"ECHA CAS={cas}: completato")
|
||||
log_ricerche(cas, 'ECHA', True)
|
||||
return echa_data
|
||||
else:
|
||||
log.error(f"ECHA CAS={cas}: nessun dato recuperato")
|
||||
log_ricerche(cas, 'ECHA', False)
|
||||
return None
|
||||
#endregion
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
Loading…
Reference in a new issue