Merge branch 'main' of github.com:adish-rmr/cosmoguard_backend
it is necessary
This commit is contained in:
commit
f99593482a
4 changed files with 75 additions and 333 deletions
|
|
@ -1,246 +0,0 @@
|
||||||
"""
|
|
||||||
Script per creare un ordine mock con 4 ingredienti per testare la UI.
|
|
||||||
Inserisce direttamente nei database senza passare dalla pipeline (no scraping).
|
|
||||||
|
|
||||||
Uso: uv run python scripts/create_mock_order.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
# Aggiungi il path del progetto
|
|
||||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
||||||
|
|
||||||
from pif_compiler.functions.db_utils import (
|
|
||||||
db_connect, upsert_cliente, insert_ordine, aggiorna_stato_ordine,
|
|
||||||
update_ordine_cliente, upsert_ingrediente
|
|
||||||
)
|
|
||||||
from pif_compiler.classes.models import (
|
|
||||||
StatoOrdine, Ingredient, DapInfo, CosingInfo, ToxIndicator, Toxicity, Esposition
|
|
||||||
)
|
|
||||||
from pif_compiler.classes.main_workflow import Project, ProjectIngredient
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_preset_exists(preset_name="Test Preset"):
|
|
||||||
"""Verifica che il preset esista, altrimenti lo crea."""
|
|
||||||
preset = Esposition.get_by_name(preset_name)
|
|
||||||
if preset:
|
|
||||||
print(f"Preset '{preset_name}' già esistente")
|
|
||||||
return preset
|
|
||||||
|
|
||||||
print(f"Creazione preset '{preset_name}'...")
|
|
||||||
preset = Esposition(
|
|
||||||
preset_name=preset_name,
|
|
||||||
tipo_prodotto="Crema corpo",
|
|
||||||
luogo_applicazione="Corpo",
|
|
||||||
esp_normali=["Dermal"],
|
|
||||||
esp_secondarie=["Oral"],
|
|
||||||
esp_nano=[],
|
|
||||||
sup_esposta=15670,
|
|
||||||
freq_applicazione=1,
|
|
||||||
qta_giornaliera=7.82,
|
|
||||||
ritenzione=1.0
|
|
||||||
)
|
|
||||||
result = preset.save_to_postgres()
|
|
||||||
if result:
|
|
||||||
print(f"Preset creato con id_preset={result}")
|
|
||||||
else:
|
|
||||||
print("ERRORE: impossibile creare il preset")
|
|
||||||
sys.exit(1)
|
|
||||||
return preset
|
|
||||||
|
|
||||||
|
|
||||||
def create_mock_ingredients():
|
|
||||||
"""Crea ingredienti mock con dati finti di tossicologia e DAP."""
|
|
||||||
|
|
||||||
# GLYCERIN (56-81-5) — con NOAEL
|
|
||||||
glycerin = Ingredient(
|
|
||||||
cas="56-81-5",
|
|
||||||
inci=["GLYCERIN"],
|
|
||||||
dap_info=DapInfo(
|
|
||||||
cas="56-81-5",
|
|
||||||
molecular_weight=92.09,
|
|
||||||
log_pow=-1.76,
|
|
||||||
tpsa=60.69,
|
|
||||||
melting_point=18.0
|
|
||||||
),
|
|
||||||
cosing_info=[CosingInfo(
|
|
||||||
cas=["56-81-5"],
|
|
||||||
common_names=["Glycerol"],
|
|
||||||
inci=["GLYCERIN"],
|
|
||||||
annex=[],
|
|
||||||
functionName=["Humectant", "Solvent", "Skin conditioning"],
|
|
||||||
otherRestrictions=[],
|
|
||||||
cosmeticRestriction=None
|
|
||||||
)],
|
|
||||||
toxicity=Toxicity(
|
|
||||||
cas="56-81-5",
|
|
||||||
indicators=[
|
|
||||||
ToxIndicator(
|
|
||||||
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
|
|
||||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
|
||||||
ref="https://chem.echa.europa.eu/100.003.264"
|
|
||||||
),
|
|
||||||
ToxIndicator(
|
|
||||||
indicator="LD50", value=12600, unit="mg/kg bw",
|
|
||||||
route="oral", toxicity_type="acute_toxicity",
|
|
||||||
ref="https://chem.echa.europa.eu/100.003.264"
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# CETYL ALCOHOL (36653-82-4) — con NOAEL
|
|
||||||
cetyl = Ingredient(
|
|
||||||
cas="36653-82-4",
|
|
||||||
inci=["CETYL ALCOHOL"],
|
|
||||||
dap_info=DapInfo(
|
|
||||||
cas="36653-82-4",
|
|
||||||
molecular_weight=242.44,
|
|
||||||
log_pow=6.83,
|
|
||||||
tpsa=20.23,
|
|
||||||
melting_point=49.0
|
|
||||||
),
|
|
||||||
cosing_info=[CosingInfo(
|
|
||||||
cas=["36653-82-4"],
|
|
||||||
common_names=["Cetyl alcohol", "1-Hexadecanol"],
|
|
||||||
inci=["CETYL ALCOHOL"],
|
|
||||||
annex=[],
|
|
||||||
functionName=["Emollient", "Emulsifying", "Opacifying"],
|
|
||||||
otherRestrictions=[],
|
|
||||||
cosmeticRestriction=None
|
|
||||||
)],
|
|
||||||
toxicity=Toxicity(
|
|
||||||
cas="36653-82-4",
|
|
||||||
indicators=[
|
|
||||||
ToxIndicator(
|
|
||||||
indicator="NOAEL", value=1000, unit="mg/kg bw/day",
|
|
||||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
|
||||||
ref="https://chem.echa.europa.eu/100.004.098"
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# TOCOPHEROL (59-02-9) — con LOAEL
|
|
||||||
tocopherol = Ingredient(
|
|
||||||
cas="59-02-9",
|
|
||||||
inci=["TOCOPHEROL"],
|
|
||||||
dap_info=DapInfo(
|
|
||||||
cas="59-02-9",
|
|
||||||
molecular_weight=430.71,
|
|
||||||
log_pow=10.51,
|
|
||||||
tpsa=29.46,
|
|
||||||
melting_point=3.0
|
|
||||||
),
|
|
||||||
cosing_info=[CosingInfo(
|
|
||||||
cas=["59-02-9"],
|
|
||||||
common_names=["alpha-Tocopherol"],
|
|
||||||
inci=["TOCOPHEROL"],
|
|
||||||
annex=[],
|
|
||||||
functionName=["Antioxidant", "Skin conditioning"],
|
|
||||||
otherRestrictions=[],
|
|
||||||
cosmeticRestriction=None
|
|
||||||
)],
|
|
||||||
toxicity=Toxicity(
|
|
||||||
cas="59-02-9",
|
|
||||||
indicators=[
|
|
||||||
ToxIndicator(
|
|
||||||
indicator="LOAEL", value=500, unit="mg/kg bw/day",
|
|
||||||
route="oral", toxicity_type="repeated_dose_toxicity",
|
|
||||||
ref="https://chem.echa.europa.eu/100.000.375"
|
|
||||||
)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Salva ogni ingrediente su MongoDB + PostgreSQL
|
|
||||||
for ing in [glycerin, cetyl, tocopherol]:
|
|
||||||
mongo_id = ing.save()
|
|
||||||
print(f"Ingrediente {ing.cas} ({ing.inci[0]}) salvato (mongo_id={mongo_id})")
|
|
||||||
|
|
||||||
return glycerin, cetyl, tocopherol
|
|
||||||
|
|
||||||
|
|
||||||
def create_mock_order(preset, glycerin, cetyl, tocopherol):
|
|
||||||
"""Crea un ordine mock completo."""
|
|
||||||
|
|
||||||
# 1. Upsert cliente
|
|
||||||
client_name = "Cosmetica Test Srl"
|
|
||||||
id_cliente = upsert_cliente(client_name)
|
|
||||||
print(f"Cliente '{client_name}' → id_cliente={id_cliente}")
|
|
||||||
|
|
||||||
# 2. JSON ordine grezzo
|
|
||||||
raw_json = {
|
|
||||||
"client_name": client_name,
|
|
||||||
"product_name": "Crema Idratante Test",
|
|
||||||
"preset_esposizione": preset.preset_name,
|
|
||||||
"ingredients": [
|
|
||||||
{"inci": "AQUA", "cas": "", "percentage": 70.0, "is_colorante": False, "skip_tox": True},
|
|
||||||
{"inci": "GLYCERIN", "cas": "56-81-5", "percentage": 15.0, "is_colorante": False, "skip_tox": False},
|
|
||||||
{"inci": "CETYL ALCOHOL", "cas": "36653-82-4", "percentage": 10.0, "is_colorante": False, "skip_tox": False},
|
|
||||||
{"inci": "TOCOPHEROL", "cas": "59-02-9", "percentage": 5.0, "is_colorante": False, "skip_tox": False},
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
# 3. Salva su MongoDB orders
|
|
||||||
orders_col = db_connect(collection_name='orders')
|
|
||||||
result = orders_col.insert_one(raw_json.copy())
|
|
||||||
uuid_ordine = str(result.inserted_id)
|
|
||||||
print(f"Ordine salvato su MongoDB: uuid_ordine={uuid_ordine}")
|
|
||||||
|
|
||||||
# 4. Inserisci in PostgreSQL ordini
|
|
||||||
id_ordine = insert_ordine(uuid_ordine, id_cliente)
|
|
||||||
print(f"Ordine inserito in PostgreSQL: id_ordine={id_ordine}")
|
|
||||||
|
|
||||||
# 5. Aggiorna stato a ARRICCHITO
|
|
||||||
update_ordine_cliente(id_ordine, id_cliente)
|
|
||||||
aggiorna_stato_ordine(id_ordine, int(StatoOrdine.ARRICCHITO))
|
|
||||||
print(f"Stato ordine aggiornato a ARRICCHITO ({StatoOrdine.ARRICCHITO})")
|
|
||||||
|
|
||||||
# 6. Crea progetto con ingredienti arricchiti
|
|
||||||
project = Project(
|
|
||||||
order_id=id_ordine,
|
|
||||||
product_name="Crema Idratante Test",
|
|
||||||
client_name=client_name,
|
|
||||||
esposition=preset,
|
|
||||||
ingredients=[
|
|
||||||
ProjectIngredient(cas=None, inci="AQUA", percentage=70.0, skip_tox=True),
|
|
||||||
ProjectIngredient(cas="56-81-5", inci="GLYCERIN", percentage=15.0, ingredient=glycerin),
|
|
||||||
ProjectIngredient(cas="36653-82-4", inci="CETYL ALCOHOL", percentage=10.0, ingredient=cetyl),
|
|
||||||
ProjectIngredient(cas="59-02-9", inci="TOCOPHEROL", percentage=5.0, ingredient=tocopherol),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# 7. Salva il progetto (MongoDB + PostgreSQL)
|
|
||||||
uuid_progetto = project.save()
|
|
||||||
print(f"Progetto salvato: uuid_progetto={uuid_progetto}")
|
|
||||||
|
|
||||||
print("\n" + "=" * 60)
|
|
||||||
print("MOCK ORDER CREATO CON SUCCESSO")
|
|
||||||
print("=" * 60)
|
|
||||||
print(f" id_ordine: {id_ordine}")
|
|
||||||
print(f" uuid_ordine: {uuid_ordine}")
|
|
||||||
print(f" uuid_progetto: {uuid_progetto}")
|
|
||||||
print(f" cliente: {client_name}")
|
|
||||||
print(f" prodotto: Crema Idratante Test")
|
|
||||||
print(f" preset: {preset.preset_name}")
|
|
||||||
print(f" ingredienti: 4 (AQUA, GLYCERIN, CETYL ALCOHOL, TOCOPHEROL)")
|
|
||||||
print(f" stato: ARRICCHITO ({StatoOrdine.ARRICCHITO})")
|
|
||||||
print("=" * 60)
|
|
||||||
|
|
||||||
return id_ordine
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Creazione ordine mock...")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# 1. Assicura che il preset esista
|
|
||||||
preset = ensure_preset_exists()
|
|
||||||
|
|
||||||
# 2. Crea ingredienti mock
|
|
||||||
glycerin, cetyl, tocopherol = create_mock_ingredients()
|
|
||||||
|
|
||||||
# 3. Crea l'ordine
|
|
||||||
create_mock_order(preset, glycerin, cetyl, tocopherol)
|
|
||||||
|
|
@ -127,6 +127,7 @@ class CosingInfo(BaseModel):
|
||||||
otherRestrictions : List[str] = Field(default_factory=list)
|
otherRestrictions : List[str] = Field(default_factory=list)
|
||||||
cosmeticRestriction : Optional[str] = None
|
cosmeticRestriction : Optional[str] = None
|
||||||
reference : Optional[str] = None
|
reference : Optional[str] = None
|
||||||
|
substanceId : Optional[str] = None
|
||||||
sccsOpinionUrls : List[str] = Field(default_factory=list)
|
sccsOpinionUrls : List[str] = Field(default_factory=list)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -140,6 +141,7 @@ class CosingInfo(BaseModel):
|
||||||
'otherRestrictions',
|
'otherRestrictions',
|
||||||
'cosmeticRestriction',
|
'cosmeticRestriction',
|
||||||
'reference',
|
'reference',
|
||||||
|
'substanceId',
|
||||||
'inciName',
|
'inciName',
|
||||||
'sccsOpinionUrls'
|
'sccsOpinionUrls'
|
||||||
]
|
]
|
||||||
|
|
@ -185,6 +187,8 @@ class CosingInfo(BaseModel):
|
||||||
cosing_dict['cosmeticRestriction'] = cosing_data[k]
|
cosing_dict['cosmeticRestriction'] = cosing_data[k]
|
||||||
if k == 'reference':
|
if k == 'reference':
|
||||||
cosing_dict['reference'] = cosing_data[k]
|
cosing_dict['reference'] = cosing_data[k]
|
||||||
|
if k == 'substanceId':
|
||||||
|
cosing_dict['substanceId'] = cosing_data[k]
|
||||||
if k == 'sccsOpinionUrls':
|
if k == 'sccsOpinionUrls':
|
||||||
urls = []
|
urls = []
|
||||||
for url in cosing_data[k]:
|
for url in cosing_data[k]:
|
||||||
|
|
@ -213,6 +217,7 @@ class ToxIndicator(BaseModel):
|
||||||
toxicity_type : Optional[str] = None
|
toxicity_type : Optional[str] = None
|
||||||
ref : Optional[str] = None
|
ref : Optional[str] = None
|
||||||
source : Optional[str] = None
|
source : Optional[str] = None
|
||||||
|
is_custom : bool = False
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def priority_rank(self):
|
def priority_rank(self):
|
||||||
|
|
@ -392,7 +397,10 @@ class Ingredient(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
|
def get_or_create(cls, cas: str, inci: Optional[List[str]] = None, force: bool = False):
|
||||||
"""Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
|
"""Restituisce l'ingrediente dalla cache se esiste e non è vecchio, altrimenti lo ricrea.
|
||||||
Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento."""
|
Se force=True, ignora la cache e riesegue lo scraping aggiornando il documento.
|
||||||
|
Al re-scraping, i campi che risultano None vengono sostituiti con il valore cached
|
||||||
|
per evitare regressioni di dati in caso di fallimenti temporanei delle fonti esterne."""
|
||||||
|
cached = None
|
||||||
if not force:
|
if not force:
|
||||||
cached = cls.from_cas(cas)
|
cached = cls.from_cas(cas)
|
||||||
if cached and not cached.is_old():
|
if cached and not cached.is_old():
|
||||||
|
|
@ -405,6 +413,26 @@ class Ingredient(BaseModel):
|
||||||
logger.info(f"get_or_create CAS={cas}: force refresh")
|
logger.info(f"get_or_create CAS={cas}: force refresh")
|
||||||
|
|
||||||
ingredient = cls.ingredient_builder(cas, inci=inci)
|
ingredient = cls.ingredient_builder(cas, inci=inci)
|
||||||
|
|
||||||
|
if cached:
|
||||||
|
if ingredient.dap_info is None and cached.dap_info is not None:
|
||||||
|
logger.warning(f"get_or_create CAS={cas}: dap_info non ottenuto, mantengo dati cached")
|
||||||
|
ingredient.dap_info = cached.dap_info
|
||||||
|
if ingredient.cosing_info is None and cached.cosing_info is not None:
|
||||||
|
logger.warning(f"get_or_create CAS={cas}: cosing_info non ottenuto, mantengo dati cached")
|
||||||
|
ingredient.cosing_info = cached.cosing_info
|
||||||
|
if ingredient.toxicity is None and cached.toxicity is not None:
|
||||||
|
logger.warning(f"get_or_create CAS={cas}: toxicity non ottenuta, mantengo dati cached")
|
||||||
|
ingredient.toxicity = cached.toxicity
|
||||||
|
elif ingredient.toxicity is not None and cached.toxicity is not None:
|
||||||
|
custom_indicators = [i for i in cached.toxicity.indicators if i.is_custom]
|
||||||
|
if custom_indicators:
|
||||||
|
logger.info(f"get_or_create CAS={cas}: preservo {len(custom_indicators)} indicatori custom nel re-scraping")
|
||||||
|
ingredient.toxicity = Toxicity(
|
||||||
|
cas=ingredient.toxicity.cas,
|
||||||
|
indicators=ingredient.toxicity.indicators + custom_indicators
|
||||||
|
)
|
||||||
|
|
||||||
ingredient.save()
|
ingredient.save()
|
||||||
return ingredient
|
return ingredient
|
||||||
|
|
||||||
|
|
@ -452,6 +480,7 @@ class Ingredient(BaseModel):
|
||||||
|
|
||||||
def add_tox_indicator(self, indicator: ToxIndicator):
|
def add_tox_indicator(self, indicator: ToxIndicator):
|
||||||
"""Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
|
"""Aggiunge un indicatore tossicologico custom e ricalcola il best_case."""
|
||||||
|
indicator.is_custom = True
|
||||||
if self.toxicity is None:
|
if self.toxicity is None:
|
||||||
self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
|
self.toxicity = Toxicity(cas=self.cas, indicators=[indicator])
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,8 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
|
||||||
# --- Tox best_case PDF ---
|
# --- Tox best_case PDF ---
|
||||||
best = ing.toxicity.best_case if ing.toxicity else None
|
best = ing.toxicity.best_case if ing.toxicity else None
|
||||||
if best and best.ref:
|
if best and best.ref:
|
||||||
pdf_name = f"{pi.cas}_{best.source}" if best.source else pi.cas
|
source_label = best.source or best.toxicity_type or "tox"
|
||||||
|
pdf_name = f"{pi.cas}_{source_label}"
|
||||||
log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}")
|
log.info(f"Generazione PDF tox: {pdf_name} da {best.ref}")
|
||||||
success = await generate_pdf(best.ref, pdf_name)
|
success = await generate_pdf(best.ref, pdf_name)
|
||||||
if success:
|
if success:
|
||||||
|
|
@ -143,7 +144,7 @@ async def generate_project_source_pdfs(project, output_dir: str = "pdfs") -> lis
|
||||||
generated.append(pdf_path)
|
generated.append(pdf_path)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
log.info(f"Download COSING PDF: {pdf_name} (ref={cosing.reference})")
|
log.info(f"Download COSING PDF: {pdf_name} (reference={cosing.reference})")
|
||||||
content = cosing_download(cosing.reference)
|
content = cosing_download(cosing.reference)
|
||||||
if isinstance(content, bytes):
|
if isinstance(content, bytes):
|
||||||
with open(pdf_path, 'wb') as f:
|
with open(pdf_path, 'wb') as f:
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from playwright.sync_api import sync_playwright
|
||||||
from typing import Callable, Any
|
from typing import Callable, Any
|
||||||
|
|
||||||
from pif_compiler.functions.common_log import get_logger
|
from pif_compiler.functions.common_log import get_logger
|
||||||
from pif_compiler.functions.db_utils import db_connect, log_ricerche
|
from pif_compiler.functions.db_utils import log_ricerche
|
||||||
|
|
||||||
log = get_logger()
|
log = get_logger()
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
@ -30,12 +30,12 @@ legislation = "&legislation=REACH"
|
||||||
def search_substance(cas : str) -> dict:
|
def search_substance(cas : str) -> dict:
|
||||||
response = requests.get(BASE_SEARCH + cas)
|
response = requests.get(BASE_SEARCH + cas)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
log.error(f"Network error: {response.status_code}")
|
log.error(f"search_substance CAS={cas}: HTTP {response.status_code}")
|
||||||
return {}
|
return {}
|
||||||
else:
|
else:
|
||||||
response = response.json()
|
response = response.json()
|
||||||
if response['state']['totalItems'] == 0:
|
if response['state']['totalItems'] == 0:
|
||||||
log.info(f"No substance found for CAS {cas}")
|
log.warning(f"search_substance CAS={cas}: nessuna sostanza trovata su ECHA")
|
||||||
return {}
|
return {}
|
||||||
else:
|
else:
|
||||||
for result in response['items']:
|
for result in response['items']:
|
||||||
|
|
@ -47,9 +47,9 @@ def search_substance(cas : str) -> dict:
|
||||||
"rmlName": result["substanceIndex"]["rmlName"],
|
"rmlName": result["substanceIndex"]["rmlName"],
|
||||||
"rmlId": result["substanceIndex"]["rmlId"]
|
"rmlId": result["substanceIndex"]["rmlId"]
|
||||||
}
|
}
|
||||||
log.info(f"Substance found for CAS {cas}: {substance['rmlName']}")
|
log.debug(f"search_substance CAS={cas}: trovata '{substance['rmlName']}'")
|
||||||
return substance
|
return substance
|
||||||
log.error(f"Something went wrong searching the substance for CAS {cas}")
|
log.warning(f"search_substance CAS={cas}: {response['state']['totalItems']} risultati ma nessun match esatto sul CAS")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -57,14 +57,16 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
|
||||||
url = BASE_DOSSIER + rmlId + type + legislation
|
url = BASE_DOSSIER + rmlId + type + legislation
|
||||||
response_dossier = requests.get(url)
|
response_dossier = requests.get(url)
|
||||||
if response_dossier.status_code != 200:
|
if response_dossier.status_code != 200:
|
||||||
log.error(f"Network error: {response_dossier.status_code}")
|
log.error(f"get_dossier_info rmlId={rmlId}: HTTP {response_dossier.status_code}")
|
||||||
return {}
|
return {}
|
||||||
response_dossier_json = response_dossier.json()
|
response_dossier_json = response_dossier.json()
|
||||||
if response_dossier_json['state']['totalItems'] == 0:
|
if response_dossier_json['state']['totalItems'] == 0:
|
||||||
log.info(f"No dossier found for RML ID {rmlId}")
|
|
||||||
if type == active:
|
if type == active:
|
||||||
|
log.debug(f"get_dossier_info rmlId={rmlId}: nessun dossier attivo, provo inattivi")
|
||||||
return get_dossier_info(rmlId, inactive)
|
return get_dossier_info(rmlId, inactive)
|
||||||
|
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier trovato (né attivo né inattivo)")
|
||||||
return {}
|
return {}
|
||||||
|
dossier_info = {}
|
||||||
for dossier in response_dossier_json['items']:
|
for dossier in response_dossier_json['items']:
|
||||||
if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)":
|
if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)":
|
||||||
dossier_info = {
|
dossier_info = {
|
||||||
|
|
@ -75,7 +77,8 @@ def get_dossier_info(rmlId: str, type = active) -> dict:
|
||||||
"assetExternalId": dossier['assetExternalId'],
|
"assetExternalId": dossier['assetExternalId'],
|
||||||
"rootKey": dossier['rootKey']
|
"rootKey": dossier['rootKey']
|
||||||
}
|
}
|
||||||
log.info(f"Dossier info retrieved for RML ID {rmlId}")
|
if not dossier_info:
|
||||||
|
log.warning(f"get_dossier_info rmlId={rmlId}: nessun dossier 'Article 10 - full / Lead' tra i {response_dossier_json['state']['totalItems']} trovati")
|
||||||
return dossier_info
|
return dossier_info
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -85,7 +88,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
||||||
|
|
||||||
response = requests.get(INDEX + "/index.html")
|
response = requests.get(INDEX + "/index.html")
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
log.error(f"Network error: {response.status_code}")
|
log.error(f"get_substance_index {assetExternalId}: HTTP {response.status_code}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
soup = BeautifulSoup(response.content, 'html.parser')
|
||||||
|
|
@ -98,7 +101,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
||||||
txi_href = txi_link['href']
|
txi_href = txi_link['href']
|
||||||
index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html'
|
index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error retrieving toxicological information link: {e}")
|
log.warning(f"get_substance_index: link tossicologia non trovato — {e}")
|
||||||
index_data['toxicological_information_link'] = None
|
index_data['toxicological_information_link'] = None
|
||||||
|
|
||||||
# Repeated dose toxicity : rdt
|
# Repeated dose toxicity : rdt
|
||||||
|
|
@ -108,7 +111,7 @@ def get_substance_index(assetExternalId : str) -> dict:
|
||||||
rdt_href = rdt_link['href']
|
rdt_href = rdt_link['href']
|
||||||
index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html'
|
index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error retrieving repeated dose toxicity link: {e}")
|
log.warning(f"get_substance_index: link repeated dose non trovato — {e}")
|
||||||
index_data['repeated_dose_toxicity_link'] = None
|
index_data['repeated_dose_toxicity_link'] = None
|
||||||
|
|
||||||
# Acute toxicity : at
|
# Acute toxicity : at
|
||||||
|
|
@ -118,11 +121,9 @@ def get_substance_index(assetExternalId : str) -> dict:
|
||||||
at_href = at_link['href']
|
at_href = at_link['href']
|
||||||
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
|
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"Error retrieving acute toxicity link: {e}")
|
log.warning(f"get_substance_index: link acute toxicity non trovato — {e}")
|
||||||
index_data['acute_toxicity_link'] = None
|
index_data['acute_toxicity_link'] = None
|
||||||
|
|
||||||
log.info(f"Substance index retrieved for Asset External ID {assetExternalId}")
|
|
||||||
|
|
||||||
return index_data
|
return index_data
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -429,8 +430,8 @@ def echa_flow(cas) -> dict:
|
||||||
substance = search_substance(cas)
|
substance = search_substance(cas)
|
||||||
dossier_info = get_dossier_info(substance['rmlId'])
|
dossier_info = get_dossier_info(substance['rmlId'])
|
||||||
index = get_substance_index(dossier_info['assetExternalId'])
|
index = get_substance_index(dossier_info['assetExternalId'])
|
||||||
except Exception as e:
|
except KeyError as e:
|
||||||
log.error(f"Error in ECHA flow for CAS {cas}: {e}")
|
log.error(f"echa_flow CAS={cas}: chiave mancante nella risposta ECHA — {e}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
|
|
@ -442,14 +443,14 @@ def echa_flow(cas) -> dict:
|
||||||
"repeated_dose_toxicity": {}
|
"repeated_dose_toxicity": {}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug(f"ECHA flow intermediate result")
|
|
||||||
|
|
||||||
# Fetch and parse toxicological information
|
# Fetch and parse toxicological information
|
||||||
txi_link = index.get('toxicological_information_link')
|
txi_link = index.get('toxicological_information_link')
|
||||||
if txi_link:
|
if txi_link:
|
||||||
response_summary = requests.get(txi_link)
|
response_summary = requests.get(txi_link)
|
||||||
if response_summary.status_code == 200:
|
if response_summary.status_code == 200:
|
||||||
result['toxicological_information'] = parse_toxicology_html(response_summary.content)
|
result['toxicological_information'] = parse_toxicology_html(response_summary.content)
|
||||||
|
else:
|
||||||
|
log.warning(f"echa_flow CAS={cas}: tossicologia HTTP {response_summary.status_code}")
|
||||||
|
|
||||||
# Fetch and parse acute toxicity
|
# Fetch and parse acute toxicity
|
||||||
at_link = index.get('acute_toxicity_link')
|
at_link = index.get('acute_toxicity_link')
|
||||||
|
|
@ -457,6 +458,8 @@ def echa_flow(cas) -> dict:
|
||||||
response_acute = requests.get(at_link)
|
response_acute = requests.get(at_link)
|
||||||
if response_acute.status_code == 200:
|
if response_acute.status_code == 200:
|
||||||
result['acute_toxicity'] = parse_toxicology_html(response_acute.content)
|
result['acute_toxicity'] = parse_toxicology_html(response_acute.content)
|
||||||
|
else:
|
||||||
|
log.warning(f"echa_flow CAS={cas}: acute toxicity HTTP {response_acute.status_code}")
|
||||||
|
|
||||||
# Fetch and parse repeated dose toxicity
|
# Fetch and parse repeated dose toxicity
|
||||||
rdt_link = index.get('repeated_dose_toxicity_link')
|
rdt_link = index.get('repeated_dose_toxicity_link')
|
||||||
|
|
@ -464,86 +467,41 @@ def echa_flow(cas) -> dict:
|
||||||
response_repeated = requests.get(rdt_link)
|
response_repeated = requests.get(rdt_link)
|
||||||
if response_repeated.status_code == 200:
|
if response_repeated.status_code == 200:
|
||||||
result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content)
|
result['repeated_dose_toxicity'] = parse_toxicology_html(response_repeated.content)
|
||||||
|
|
||||||
for key, value in result.items():
|
|
||||||
if value is None or value == "" or value == [] or value == {}:
|
|
||||||
log.warning(f"Missing data for key: {key} in CAS {cas}")
|
|
||||||
else:
|
else:
|
||||||
log.info(f"Data retrieved for key: {key} in CAS {cas}")
|
log.warning(f"echa_flow CAS={cas}: repeated dose HTTP {response_repeated.status_code}")
|
||||||
|
|
||||||
|
txi_ok = bool(result['toxicological_information'])
|
||||||
|
at_ok = bool(result['acute_toxicity'])
|
||||||
|
rdt_ok = bool(result['repeated_dose_toxicity'])
|
||||||
|
log.info(f"echa_flow CAS={cas}: txi={'OK' if txi_ok else '-'}, acute={'OK' if at_ok else '-'}, rdt={'OK' if rdt_ok else '-'}")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def cas_validation(cas: str) -> str:
|
def cas_validation(cas: str) -> str:
|
||||||
log.info(f"Starting ECHA data extraction for CAS: {cas}")
|
|
||||||
if cas is None or cas.strip() == "":
|
if cas is None or cas.strip() == "":
|
||||||
log.error("No CAS number provided.")
|
log.error("cas_validation: CAS vuoto o None")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
cas_stripped = cas.replace("-", "")
|
cas_stripped = cas.replace("-", "")
|
||||||
if cas_stripped.isdigit() and len(cas_stripped) <= 12:
|
if cas_stripped.isdigit() and len(cas_stripped) <= 12:
|
||||||
log.info(f"CAS number {cas} maybe is valid.")
|
|
||||||
return cas.strip()
|
return cas.strip()
|
||||||
else:
|
log.error(f"cas_validation: CAS '{cas}' non valido (formato non riconosciuto)")
|
||||||
log.error(f"CAS number {cas} is not valid.")
|
return None
|
||||||
return None
|
|
||||||
|
|
||||||
def check_local(cas: str) -> bool:
|
|
||||||
collection = db_connect()
|
|
||||||
|
|
||||||
if collection is None:
|
|
||||||
log.error("No MongoDB collection available.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
record = collection.find_one({"substance.rmlCas": cas})
|
|
||||||
|
|
||||||
if record:
|
|
||||||
log.info(f"Record for CAS {cas} found in local database.")
|
|
||||||
return record
|
|
||||||
else:
|
|
||||||
log.info(f"No record for CAS {cas} found in local database.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def add_to_local(data: dict) -> bool:
|
|
||||||
collection = db_connect()
|
|
||||||
|
|
||||||
if collection is None:
|
|
||||||
log.error("No MongoDB collection available.")
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
collection.insert_one(data)
|
|
||||||
log.info(f"Data for CAS {data['substance']['rmlCas']} added to local database.")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
log.error(f"Error inserting data into MongoDB: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def orchestrator(cas: str) -> dict:
|
def orchestrator(cas: str) -> dict:
|
||||||
log.debug(f"Initiating search for CAS {cas} in ECHA service.")
|
log.debug(f"ECHA orchestrator CAS={cas}")
|
||||||
cas_validated = cas_validation(cas)
|
cas_validated = cas_validation(cas)
|
||||||
if not cas_validated:
|
if not cas_validated:
|
||||||
return None
|
return None
|
||||||
else:
|
|
||||||
log.info(f"CAS {cas} validated successfully.")
|
|
||||||
local_record = check_local(cas_validated)
|
|
||||||
if local_record:
|
|
||||||
log.info(f"Returning local record for CAS {cas}.")
|
|
||||||
log_ricerche(cas, 'ECHA', True)
|
|
||||||
return local_record
|
|
||||||
else:
|
|
||||||
log.info(f"No local record, starting echa flow")
|
|
||||||
echa_data = echa_flow(cas_validated)
|
|
||||||
if echa_data:
|
|
||||||
log.info(f"Echa flow successful")
|
|
||||||
log_ricerche(cas, 'ECHA', True)
|
|
||||||
add_to_local(echa_data)
|
|
||||||
return echa_data
|
|
||||||
else:
|
|
||||||
log.error(f"Failed to retrieve ECHA data for CAS {cas}.")
|
|
||||||
log_ricerche(cas, 'ECHA', False)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# to do: check if document is complete
|
echa_data = echa_flow(cas_validated)
|
||||||
# to do: check lastupdate
|
if echa_data:
|
||||||
|
log.info(f"ECHA CAS={cas}: completato")
|
||||||
|
log_ricerche(cas, 'ECHA', True)
|
||||||
|
return echa_data
|
||||||
|
else:
|
||||||
|
log.error(f"ECHA CAS={cas}: nessun dato recuperato")
|
||||||
|
log_ricerche(cas, 'ECHA', False)
|
||||||
|
return None
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue