scadenza, caching e log

This commit is contained in:
adish-rmr 2026-02-26 16:44:45 +01:00
parent da5e332efa
commit bba5c11bc9
4 changed files with 75 additions and 333 deletions

View file

@ -1,246 +0,0 @@
"""
Script per creare un ordine mock con 4 ingredienti per testare la UI.
Inserisce direttamente nei database senza passare dalla pipeline (no scraping).
Uso: uv run python scripts/create_mock_order.py
"""
import sys
import os
# Aggiungi il path del progetto
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
from pif_compiler.functions.db_utils import (
db_connect, upsert_cliente, insert_ordine, aggiorna_stato_ordine,
update_ordine_cliente, upsert_ingrediente
)
from pif_compiler.classes.models import (
StatoOrdine, Ingredient, DapInfo, CosingInfo, ToxIndicator, Toxicity, Esposition
)
from pif_compiler.classes.main_workflow import Project, ProjectIngredient
def ensure_preset_exists(preset_name="Test Preset"):
"""Verifica che il preset esista, altrimenti lo crea."""
preset = Esposition.get_by_name(preset_name)
if preset:
print(f"Preset '{preset_name}' già esistente")
return preset
print(f"Creazione preset '{preset_name}'...")
preset = Esposition(
preset_name=preset_name,
tipo_prodotto="Crema corpo",
luogo_applicazione="Corpo",
esp_normali=["Dermal"],
esp_secondarie=["Oral"],
esp_nano=[],
sup_esposta=15670,
freq_applicazione=1,
qta_giornaliera=7.82,
ritenzione=1.0
)
result = preset.save_to_postgres()
if result:
print(f"Preset creato con id_preset={result}")
else:
print("ERRORE: impossibile creare il preset")
sys.exit(1)
return preset
def create_mock_ingredients():
"""Crea ingredienti mock con dati finti di tossicologia e DAP."""
# GLYCERIN (56-81-5) — con NOAEL
glycerin = Ingredient(
cas="56-81-5",
inci=["GLYCERIN"],
dap_info=DapInfo(
cas="56-81-5",
molecular_weight=92.09,
log_pow=-1.76,
tpsa=60.69,
melting_point=18.0
),
cosing_info=[CosingInfo(
cas=["56-81-5"],
common_names=["Glycerol"],
inci=["GLYCERIN"],
annex=[],
functionName=["Humectant", "Solvent", "Skin conditioning"],
otherRestrictions=[],
cosmeticRestriction=None
)],
toxicity=Toxicity(
cas="56-81-5",
indicators=[
ToxIndicator(
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
route="oral", toxicity_type="repeated_dose_toxicity",
ref="https://chem.echa.europa.eu/100.003.264"
),
ToxIndicator(
indicator="LD50", value=12600, unit="mg/kg bw",
route="oral", toxicity_type="acute_toxicity",
ref="https://chem.echa.europa.eu/100.003.264"
)
]
)
)
# CETYL ALCOHOL (36653-82-4) — con NOAEL
cetyl = Ingredient(
cas="36653-82-4",
inci=["CETYL ALCOHOL"],
dap_info=DapInfo(
cas="36653-82-4",
molecular_weight=242.44,
log_pow=6.83,
tpsa=20.23,
melting_point=49.0
),
cosing_info=[CosingInfo(
cas=["36653-82-4"],
common_names=["Cetyl alcohol", "1-Hexadecanol"],
inci=["CETYL ALCOHOL"],
annex=[],
functionName=["Emollient", "Emulsifying", "Opacifying"],
otherRestrictions=[],
cosmeticRestriction=None
)],
toxicity=Toxicity(
cas="36653-82-4",
indicators=[
ToxIndicator(
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
route="oral", toxicity_type="repeated_dose_toxicity",
ref="https://chem.echa.europa.eu/100.004.098"
)
]
)
)
# TOCOPHEROL (59-02-9) — con LOAEL
tocopherol = Ingredient(
cas="59-02-9",
inci=["TOCOPHEROL"],
dap_info=DapInfo(
cas="59-02-9",
molecular_weight=430.71,
log_pow=10.51,
tpsa=29.46,
melting_point=3.0
),
cosing_info=[CosingInfo(
cas=["59-02-9"],
common_names=["alpha-Tocopherol"],
inci=["TOCOPHEROL"],
annex=[],
functionName=["Antioxidant", "Skin conditioning"],
otherRestrictions=[],
cosmeticRestriction=None
)],
toxicity=Toxicity(
cas="59-02-9",
indicators=[
ToxIndicator(
indicator="LOAEL", value=500, unit="mg/kg bw/day",
route="oral", toxicity_type="repeated_dose_toxicity",
ref="https://chem.echa.europa.eu/100.000.375"
)
]
)
)
# Salva ogni ingrediente su MongoDB + PostgreSQL
for ing in [glycerin, cetyl, tocopherol]:
mongo_id = ing.save()
print(f"Ingrediente {ing.cas} ({ing.inci[0]}) salvato (mongo_id={mongo_id})")
return glycerin, cetyl, tocopherol
def create_mock_order(preset, glycerin, cetyl, tocopherol):
"""Crea un ordine mock completo."""
# 1. Upsert cliente
client_name = "Cosmetica Test Srl"
id_cliente = upsert_cliente(client_name)
print(f"Cliente '{client_name}' → id_cliente={id_cliente}")
# 2. JSON ordine grezzo
raw_json = {
"client_name": client_name,
"product_name": "Crema Idratante Test",
"preset_esposizione": preset.preset_name,
"ingredients": [
{"inci": "AQUA", "cas": "", "percentage": 70.0, "is_colorante": False, "skip_tox": True},
{"inci": "GLYCERIN", "cas": "56-81-5", "percentage": 15.0, "is_colorante": False, "skip_tox": False},
{"inci": "CETYL ALCOHOL", "cas": "36653-82-4", "percentage": 10.0, "is_colorante": False, "skip_tox": False},
{"inci": "TOCOPHEROL", "cas": "59-02-9", "percentage": 5.0, "is_colorante": False, "skip_tox": False},
]
}
# 3. Salva su MongoDB orders
orders_col = db_connect(collection_name='orders')
result = orders_col.insert_one(raw_json.copy())
uuid_ordine = str(result.inserted_id)
print(f"Ordine salvato su MongoDB: uuid_ordine={uuid_ordine}")
# 4. Inserisci in PostgreSQL ordini
id_ordine = insert_ordine(uuid_ordine, id_cliente)
print(f"Ordine inserito in PostgreSQL: id_ordine={id_ordine}")
# 5. Aggiorna stato a ARRICCHITO
update_ordine_cliente(id_ordine, id_cliente)
aggiorna_stato_ordine(id_ordine, int(StatoOrdine.ARRICCHITO))
print(f"Stato ordine aggiornato a ARRICCHITO ({StatoOrdine.ARRICCHITO})")
# 6. Crea progetto con ingredienti arricchiti
project = Project(
order_id=id_ordine,
product_name="Crema Idratante Test",
client_name=client_name,
esposition=preset,
ingredients=[
ProjectIngredient(cas=None, inci="AQUA", percentage=70.0, skip_tox=True),
ProjectIngredient(cas="56-81-5", inci="GLYCERIN", percentage=15.0, ingredient=glycerin),
ProjectIngredient(cas="36653-82-4", inci="CETYL ALCOHOL", percentage=10.0, ingredient=cetyl),
ProjectIngredient(cas="59-02-9", inci="TOCOPHEROL", percentage=5.0, ingredient=tocopherol),
]
)
# 7. Salva il progetto (MongoDB + PostgreSQL)
uuid_progetto = project.save()
print(f"Progetto salvato: uuid_progetto={uuid_progetto}")
print("\n" + "=" * 60)
print("MOCK ORDER CREATO CON SUCCESSO")
print("=" * 60)
print(f" id_ordine: {id_ordine}")
print(f" uuid_ordine: {uuid_ordine}")
print(f" uuid_progetto: {uuid_progetto}")
print(f" cliente: {client_name}")
print(f" prodotto: Crema Idratante Test")
print(f" preset: {preset.preset_name}")
print(f" ingredienti: 4 (AQUA, GLYCERIN, CETYL ALCOHOL, TOCOPHEROL)")
print(f" stato: ARRICCHITO ({StatoOrdine.ARRICCHITO})")
print("=" * 60)
return id_ordine
if __name__ == "__main__":
print("Creazione ordine mock...")
print()
# 1. Assicura che il preset esista
preset = ensure_preset_exists()
# 2. Crea ingredienti mock
glycerin, cetyl, tocopherol = create_mock_ingredients()
# 3. Crea l'ordine
create_mock_order(preset, glycerin, cetyl, tocopherol)

View file

@ -127,6 +127,7 @@ class CosingInfo(BaseModel):
otherRestrictions : List[str] = Field(default_factory=list) otherRestrictions : List[str] = Field(default_factory=list)
cosmeticRestriction : Optional[str] = None cosmeticRestriction : Optional[str] = None
reference : Optional[str] = None reference : Optional[str] = None
substanceId : Optional[str] = None
sccsOpinionUrls : List[str] = Field(default_factory=list) sccsOpinionUrls : List[str] = Field(default_factory=list)
@classmethod @classmethod
@ -140,6 +141,7 @@ class CosingInfo(BaseModel):
'otherRestrictions', 'otherRestrictions',
'cosmeticRestriction', 'cosmeticRestriction',
'reference', 'reference',
'substanceId',
'inciName', 'inciName',
'sccsOpinionUrls' 'sccsOpinionUrls'
] ]
@ -185,6 +187,8 @@ class CosingInfo(BaseModel):
cosing_dict['cosmeticRestriction'] = cosing_data[k] cosing_dict['cosmeticRestriction'] = cosing_data[k]
if k == 'reference': if k == 'reference':
cosing_dict['reference'] = cosing_data[k] cosing_dict['reference'] = cosing_data[k]
if k == 'substanceId':
cosing_dict['substanceId'] = cosing_data[k]
if k == 'sccsOpinionUrls': if k == 'sccsOpinionUrls':
urls = [] urls = []
for url in cosing_data[k]: for url in cosing_data[k]:
@ -213,6 +217,7 @@ class ToxIndicator(BaseModel):
toxicity_type : Optional[str] = None toxicity_type : Optional[str] = None
ref : Optional[str] = None ref : Optional[str] = None
source : Optional[str] = None source : Optional[str] = None
is_custom : bool = False
@property @property
def priority_rank(self): def priority_rank(self):
@ -392,7 +397,10 @@ class Ingredient(BaseModel):
@classmethod @classmethod
def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False): def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
"""Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea. """Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.""" Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.
Al re-scraping, i campi che risultano None vengono sostituiti con il valore cached
per evitare regressioni di dati in caso di fallimenti temporanei delle fonti esterne."""
cached = None
if not force: if not force:
cached = cls.from_cas(cas) cached = cls.from_cas(cas)
if cached and not cached.is_old(): if cached and not cached.is_old():
@ -405,6 +413,26 @@ class Ingredient(BaseModel):
logger.info(f"get_or_create CAS={cas}: force refresh") logger.info(f"get_or_create CAS={cas}: force refresh")
ingredient = cls.ingredient_builder(cas, inci=inci) ingredient = cls.ingredient_builder(cas, inci=inci)
if cached:
if ingredient.dap_info is None and cached.dap_info is not None:
logger.warning(f"get_or_create CAS={cas}: dap_info non ottenuto, mantengo dati cached")
ingredient.dap_info = cached.dap_info
if ingredient.cosing_info is None and cached.cosing_info is not None:
logger.warning(f"get_or_create CAS={cas}: cosing_info non ottenuto, mantengo dati cached")
ingredient.cosing_info = cached.cosing_info
if ingredient.toxicity is None and cached.toxicity is not None:
logger.warning(f"get_or_create CAS={cas}: toxicity non ottenuta, mantengo dati cached")
ingredient.toxicity = cached.toxicity
elif ingredient.toxicity is not None and cached.toxicity is not None:
custom_indicators = [i for i in cached.toxicity.indicators if i.is_custom]
if custom_indicators:
logger.info(f"get_or_create CAS={cas}: preservo {len(custom_indicators)} indicatori custom nel re-scraping")
ingredient.toxicity = Toxicity(
cas=ingredient.toxicity.cas,
indicators=ingredient.toxicity.indicators + custom_indicators
)
ingredient.save() ingredient.save()
return ingredient return ingredient
@ -452,6 +480,7 @@ class Ingredient(BaseModel):
def add_tox_indicator(self, indicator: ToxIndicator): def add_tox_indicator(self, indicator: ToxIndicator):
"""Aggiunge un indicatore tossicologico custom e ricalcola il best_case.""" """Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
indicator.is_custom = True
if self.toxicity is None: if self.toxicity is None:
self.toxicity = Toxicity(cas=self.cas, indicators=[indicator]) self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
else: else:

View file

@ -120,7 +120,8 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
# --- Tox best_case PDF --- # --- Tox best_case PDF ---
best = ing.toxicity.best_case if ing.toxicity else None best = ing.toxicity.best_case if ing.toxicity else None
if best and best.ref: if best and best.ref:
pdf_name = f"{pi.cas}_{best.source}" if best.source else pi.cas source_label = best.source or best.toxicity_type or "tox"
pdf_name = f"{pi.cas}_{source_label}"
log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}") log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}")
success = await generate_pdf(best.ref, pdf_name) success = await generate_pdf(best.ref, pdf_name)
if success: if success:
@ -143,7 +144,7 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
generated.append(pdf_path) generated.append(pdf_path)
continue continue
log.info(f"Download COSING PDF: {pdf_name} (ref={cosing.reference})") log.info(f"Download COSING PDF: {pdf_name} (reference={cosing.reference})")
content = cosing_download(cosing.reference) content = cosing_download(cosing.reference)
if isinstance(content, bytes): if isinstance(content, bytes):
with open(pdf_path, 'wb') as f: with open(pdf_path, 'wb') as f:

View file

@ -9,7 +9,7 @@ from playwright.sync_api import sync_playwright
from typing import Callable, Any from typing import Callable, Any
from pif_compiler.functions.common_log import get_logger from pif_compiler.functions.common_log import get_logger
from pif_compiler.functions.db_utils import db_connect, log_ricerche from pif_compiler.functions.db_utils import log_ricerche
log = get_logger() log = get_logger()
load_dotenv() load_dotenv()
@ -30,12 +30,12 @@ legislation = "&legislation=REACH"
def search_substance(cas : str) -> dict: def search_substance(cas : str) -> dict:
response = requests.get(BASE_SEARCH + cas) response = requests.get(BASE_SEARCH + cas)
if response.status_code != 200: if response.status_code != 200:
log.error(f"Network error: {response.status_code}") log.error(f"search_substance CAS={cas}: HTTP {response.status_code}")
return {} return {}
else: else:
response = response.json() response = response.json()
if response['state']['totalItems'] == 0: if response['state']['totalItems'] == 0:
log.info(f"No substance found for CAS {cas}") log.warning(f"search_substance CAS={cas}: nessuna sostanza trovata su ECHA")
return {} return {}
else: else:
for result in response['items']: for result in response['items']:
@ -47,9 +47,9 @@ def search_substance(cas : str) -> dict:
"rmlName": result["substanceIndex"]["rmlName"], "rmlName": result["substanceIndex"]["rmlName"],
"rmlId": result["substanceIndex"]["rmlId"] "rmlId": result["substanceIndex"]["rmlId"]
} }
log.info(f"Substance found for CAS {cas}: {substance['rmlName']}") log.debug(f"search_substance CAS={cas}: trovata '{substance['rmlName']}'")
return substance return substance
log.error(f"Something went wrong searching the substance for CAS {cas}") log.warning(f"search_substance CAS={cas}: {response['state']['totalItems']} risultati ma nessun match esatto sul CAS")
return {} return {}
@ -57,14 +57,16 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
url = BASE_DOSSIER + rmlId + type + legislation url = BASE_DOSSIER + rmlId + type + legislation
response_dossier = requests.get(url) response_dossier = requests.get(url)
if response_dossier.status_code != 200: if response_dossier.status_code != 200:
log.error(f"Network error: {response_dossier.status_code}") log.error(f"get_dossier_info rmlId={rmlId}: HTTP {response_dossier.status_code}")
return {} return {}
response_dossier_json = response_dossier.json() response_dossier_json = response_dossier.json()
if response_dossier_json['state']['totalItems'] == 0: if response_dossier_json['state']['totalItems'] == 0:
log.info(f"No dossier found for RML ID {rmlId}")
if type == active: if type == active:
log.debug(f"get_dossier_info rmlId={rmlId}: nessun dossier attivo, provo inattivi")
return get_dossier_info(rmlId, inactive) return get_dossier_info(rmlId, inactive)
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier trovato (né attivo né inattivo)")
return {} return {}
dossier_info = {}
for dossier in response_dossier_json['items']: for dossier in response_dossier_json['items']:
if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)": if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)":
dossier_info = { dossier_info = {
@ -75,7 +77,8 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
"assetExternalId": dossier['assetExternalId'], "assetExternalId": dossier['assetExternalId'],
"rootKey": dossier['rootKey'] "rootKey": dossier['rootKey']
} }
log.info(f"Dossier info retrieved for RML ID {rmlId}") if not dossier_info:
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier 'Article 10 - full / Lead' tra i {response_dossier_json['state']['totalItems']} trovati")
return dossier_info return dossier_info
@ -85,7 +88,7 @@ def get_substance_index(assetExternalId : str) -> dict:
response = requests.get(INDEX + "/index.html") response = requests.get(INDEX + "/index.html")
if response.status_code != 200: if response.status_code != 200:
log.error(f"Network error: {response.status_code}") log.error(f"get_substance_index {assetExternalId}: HTTP {response.status_code}")
return {} return {}
soup = BeautifulSoup(response.content, 'html.parser') soup = BeautifulSoup(response.content, 'html.parser')
@ -98,7 +101,7 @@ def get_substance_index(assetExternalId : str) -> dict:
txi_href = txi_link['href'] txi_href = txi_link['href']
index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html' index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html'
except Exception as e: except Exception as e:
log.error(f"Error retrieving toxicological information link: {e}") log.warning(f"get_substance_index: link tossicologia non trovato — {e}")
index_data['toxicological_information_link'] = None index_data['toxicological_information_link'] = None
# Repeated dose toxicity : rdt # Repeated dose toxicity : rdt
@ -108,7 +111,7 @@ def get_substance_index(assetExternalId : str) -> dict:
rdt_href = rdt_link['href'] rdt_href = rdt_link['href']
index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html' index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html'
except Exception as e: except Exception as e:
log.error(f"Error retrieving repeated dose toxicity link: {e}") log.warning(f"get_substance_index: link repeated dose non trovato — {e}")
index_data['repeated_dose_toxicity_link'] = None index_data['repeated_dose_toxicity_link'] = None
# Acute toxicity : at # Acute toxicity : at
@ -118,11 +121,9 @@ def get_substance_index(assetExternalId : str) -> dict:
at_href = at_link['href'] at_href = at_link['href']
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html' index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
except Exception as e: except Exception as e:
log.error(f"Error retrieving acute toxicity link: {e}") log.warning(f"get_substance_index: link acute toxicity non trovato — {e}")
index_data['acute_toxicity_link'] = None index_data['acute_toxicity_link'] = None
log.info(f"Substance index retrieved for Asset External ID {assetExternalId}")
return index_data return index_data
@ -429,8 +430,8 @@ def echa_flow(cas) -> dict:
substance = search_substance(cas) substance = search_substance(cas)
dossier_info = get_dossier_info(substance['rmlId']) dossier_info = get_dossier_info(substance['rmlId'])
index = get_substance_index(dossier_info['assetExternalId']) index = get_substance_index(dossier_info['assetExternalId'])
except Exception as e: except KeyError as e:
log.error(f"Error in ECHA flow for CAS {cas}: {e}") log.error(f"echa_flow CAS={cas}: chiave mancante nella risposta ECHA — {e}")
return {} return {}
result = { result = {
@ -442,14 +443,14 @@ def echa_flow(cas) -> dict:
"repeated_dose_toxicity": {} "repeated_dose_toxicity": {}
} }
log.debug(f"ECHA flow intermediate result")
# Fetch and parse toxicological information # Fetch and parse toxicological information
txi_link = index.get('toxicological_information_link') txi_link = index.get('toxicological_information_link')
if txi_link: if txi_link:
response_summary = requests.get(txi_link) response_summary = requests.get(txi_link)
if response_summary.status_code == 200: if response_summary.status_code == 200:
result['toxicological_information'] = parse_toxicology_html(response_summary.content) result['toxicological_information'] = parse_toxicology_html(response_summary.content)
else:
log.warning(f"echa_flow CAS={cas}: tossicologia HTTP {response_summary.status_code}")
# Fetch and parse acute toxicity # Fetch and parse acute toxicity
at_link = index.get('acute_toxicity_link') at_link = index.get('acute_toxicity_link')
@ -457,6 +458,8 @@ def echa_flow(cas) -> dict:
response_acute = requests.get(at_link) response_acute = requests.get(at_link)
if response_acute.status_code == 200: if response_acute.status_code == 200:
result['acute_toxicity'] = parse_toxicology_html(response_acute.content) result['acute_toxicity'] = parse_toxicology_html(response_acute.content)
else:
log.warning(f"echa_flow CAS={cas}: acute toxicity HTTP {response_acute.status_code}")
# Fetch and parse repeated dose toxicity # Fetch and parse repeated dose toxicity
rdt_link = index.get('repeated_dose_toxicity_link') rdt_link = index.get('repeated_dose_toxicity_link')
@ -464,86 +467,41 @@ def echa_flow(cas) -> dict:
response_repeated = requests.get(rdt_link) response_repeated = requests.get(rdt_link)
if response_repeated.status_code == 200: if response_repeated.status_code == 200:
result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content) result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content)
for key, value in result.items():
if value is None or value == "" or value == [] or value == {}:
log.warning(f"Missing data for key: {key} in CAS {cas}")
else: else:
log.info(f"Data retrieved for key: {key} in CAS {cas}") log.warning(f"echa_flow CAS={cas}: repeated dose HTTP {response_repeated.status_code}")
txi_ok = bool(result['toxicological_information'])
at_ok = bool(result['acute_toxicity'])
rdt_ok = bool(result['repeated_dose_toxicity'])
log.info(f"echa_flow CAS={cas}: txi={'OK' if txi_ok else '-'}, acute={'OK' if at_ok else '-'}, rdt={'OK' if rdt_ok else '-'}")
return result return result
def cas_validation(cas: str) -> str: def cas_validation(cas: str) -> str:
log.info(f"Starting ECHA data extraction for CAS: {cas}")
if cas is None or cas.strip() == "": if cas is None or cas.strip() == "":
log.error("No CAS number provided.") log.error("cas_validation: CAS vuoto o None")
return None return None
cas_stripped = cas.replace("-", "") cas_stripped = cas.replace("-", "")
if cas_stripped.isdigit() and len(cas_stripped) <= 12: if cas_stripped.isdigit() and len(cas_stripped) <= 12:
log.info(f"CAS number {cas} maybe is valid.")
return cas.strip() return cas.strip()
else: log.error(f"cas_validation: CAS '{cas}' non valido (formato non riconosciuto)")
log.error(f"CAS number {cas} is not valid.") return None
return None
def check_local(cas: str) -> bool:
collection = db_connect()
if collection is None:
log.error("No MongoDB collection available.")
return None
record = collection.find_one({"substance.rmlCas": cas})
if record:
log.info(f"Record for CAS {cas} found in local database.")
return record
else:
log.info(f"No record for CAS {cas} found in local database.")
return None
def add_to_local(data: dict) -> bool:
collection = db_connect()
if collection is None:
log.error("No MongoDB collection available.")
return False
try:
collection.insert_one(data)
log.info(f"Data for CAS {data['substance']['rmlCas']} added to local database.")
return True
except Exception as e:
log.error(f"Error inserting data into MongoDB: {e}")
return False
def orchestrator(cas: str) -> dict: def orchestrator(cas: str) -> dict:
log.debug(f"Initiating search for CAS {cas} in ECHA service.") log.debug(f"ECHA orchestrator CAS={cas}")
cas_validated = cas_validation(cas) cas_validated = cas_validation(cas)
if not cas_validated: if not cas_validated:
return None return None
else:
log.info(f"CAS {cas} validated successfully.")
local_record = check_local(cas_validated)
if local_record:
log.info(f"Returning local record for CAS {cas}.")
log_ricerche(cas, 'ECHA', True)
return local_record
else:
log.info(f"No local record, starting echa flow")
echa_data = echa_flow(cas_validated)
if echa_data:
log.info(f"Echa flow successful")
log_ricerche(cas, 'ECHA', True)
add_to_local(echa_data)
return echa_data
else:
log.error(f"Failed to retrieve ECHA data for CAS {cas}.")
log_ricerche(cas, 'ECHA', False)
return None
# to do: check if document is complete echa_data = echa_flow(cas_validated)
# to do: check lastupdate if echa_data:
log.info(f"ECHA CAS={cas}: completato")
log_ricerche(cas, 'ECHA', True)
return echa_data
else:
log.error(f"ECHA CAS={cas}: nessun dato recuperato")
log_ricerche(cas, 'ECHA', False)
return None
#endregion #endregion
if __name__ == "__main__": if __name__ == "__main__":