From 133003344d2f3ca03123d2a0391f48436166154d Mon Sep 17 00:00:00 2001 From: adish-rmr Date: Fri, 2 Jan 2026 19:17:25 +0100 Subject: [PATCH] fixed dossier full & lead, ricerca non va in errore se almeno uno dei tre dossier esiste --- src/pif_compiler/services/srv_echa.py | 61 ++++++++++++++++----------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/src/pif_compiler/services/srv_echa.py b/src/pif_compiler/services/srv_echa.py index 07b2957..57d2b85 100644 --- a/src/pif_compiler/services/srv_echa.py +++ b/src/pif_compiler/services/srv_echa.py @@ -64,14 +64,16 @@ def get_dossier_info(rmlId: str, type = active) -> dict: if type == active: return get_dossier_info(rmlId, inactive) return {} - dossier_info = { - "lastUpdatedDate": response_dossier_json['items'][0]['lastUpdatedDate'], - "registrationStatus": response_dossier_json['items'][0]['registrationStatus'], - "registrationStatusChangedDate": response_dossier_json['items'][0]['registrationStatusChangedDate'], - "registrationRole": response_dossier_json['items'][0]['reachDossierInfo']['registrationRole'], - "assetExternalId": response_dossier_json['items'][0]['assetExternalId'], - "rootKey": response_dossier_json['items'][0]['rootKey'] - } + for dossier in response_dossier_json['items']: + if dossier['reachDossierInfo']['dossierSubtype'] == "Article 10 - full" and dossier['reachDossierInfo']['registrationRole'] == "Lead (joint submission)": + dossier_info = { + "lastUpdatedDate": dossier['lastUpdatedDate'], + "registrationStatus": dossier['registrationStatus'], + "registrationStatusChangedDate": dossier['registrationStatusChangedDate'], + "registrationRole": dossier['reachDossierInfo']['registrationRole'], + "assetExternalId": dossier['assetExternalId'], + "rootKey": dossier['rootKey'] + } log.info(f"Dossier info retrieved for RML ID {rmlId}") return dossier_info @@ -89,30 +91,41 @@ def get_substance_index(assetExternalId : str) -> dict: index_data = {} # Toxicological information : txi - - txi_div = soup.find('div', id='id_7_Toxicologicalinformation') - txi_link = txi_div.find('a', class_='das-leaf') - txi_href = txi_link['href'] - index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html' - + try: + txi_div = soup.find('div', id='id_7_Toxicologicalinformation') + txi_link = txi_div.find('a', class_='das-leaf') + txi_href = txi_link['href'] + index_data['toxicological_information_link'] = LINK_DOSSIER + txi_href + '.html' + except Exception as e: + log.error(f"Error retrieving toxicological information link: {e}") + index_data['toxicological_information_link'] = None + # Repeated dose toxicity : rdt - - rdt_div = soup.find('div', id='id_75_Repeateddosetoxicity') - rdt_link = rdt_div.find('a', class_='das-leaf') - rdt_href = rdt_link['href'] - index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html' + try: + rdt_div = soup.find('div', id='id_75_Repeateddosetoxicity') + rdt_link = rdt_div.find('a', class_='das-leaf') + rdt_href = rdt_link['href'] + index_data['repeated_dose_toxicity_link'] = LINK_DOSSIER + rdt_href + '.html' + except Exception as e: + log.error(f"Error retrieving repeated dose toxicity link: {e}") + index_data['repeated_dose_toxicity_link'] = None # Acute toxicity : at - - at_div = soup.find('div', id='id_72_AcuteToxicity') - at_link = at_div.find('a', class_='das-leaf') - at_href = at_link['href'] - index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html' + try: + at_div = soup.find('div', id='id_72_AcuteToxicity') + at_link = at_div.find('a', class_='das-leaf') + at_href = at_link['href'] + index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html' + except Exception as e: + log.error(f"Error retrieving acute toxicity link: {e}") + index_data['acute_toxicity_link'] = None log.info(f"Substance index retrieved for Asset External ID {assetExternalId}") return index_data + + #endregion #region ECHA parsing functions of html pages