diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/src/pif_compiler/services/__init__.py b/src/pif_compiler/services/__init__.py index 810a684..fa24331 100644 --- a/src/pif_compiler/services/__init__.py +++ b/src/pif_compiler/services/__init__.py @@ -14,30 +14,6 @@ Modules: """ # ECHA Services -from pif_compiler.services.echa_find import ( - search_dossier, -) - -from pif_compiler.services.echa_process import ( - echaExtract, - echaExtract_multi, - echaExtract_specific, - echaExtract_local, - echa_noael_ld50, - echa_noael_ld50_multi, - echaPage_to_md, - openEchaPage, - markdown_to_json_raw, - clean_json, - json_to_dataframe, - filter_dataframe_by_dict, -) - -from pif_compiler.services.echa_pdf import ( - generate_pdf_with_header_and_cleanup, - search_generate_pdfs, - svg_to_data_uri, -) # COSING Service from pif_compiler.services.cosing_service import ( @@ -62,25 +38,6 @@ from pif_compiler.services.db_utils import get_client __all__ = [ - # ECHA Find - "search_dossier", - # ECHA Process - "echaExtract", - "echaExtract_multi", - "echaExtract_specific", - "echaExtract_local", - "echa_noael_ld50", - "echa_noael_ld50_multi", - "echaPage_to_md", - "openEchaPage", - "markdown_to_json_raw", - "clean_json", - "json_to_dataframe", - "filter_dataframe_by_dict", - # ECHA PDF - "generate_pdf_with_header_and_cleanup", - "search_generate_pdfs", - "svg_to_data_uri", # COSING Service "cosing_search", "clean_cosing", diff --git a/src/pif_compiler/services/db_utils.py b/src/pif_compiler/services/db_utils.py index 8774cca..de85302 100644 --- a/src/pif_compiler/services/db_utils.py +++ b/src/pif_compiler/services/db_utils.py @@ -1,4 +1,5 @@ import os +from urllib.parse import quote_plus from dotenv import load_dotenv from pymongo import MongoClient @@ -15,6 +16,9 @@ def get_client(): MONGO_HOST = os.getenv("MONGO_HOST") MONGO_PORT = os.getenv("MONGO_PORT") + MONGO_PORT = MONGO_PORT + ADMIN_PASSWORD = quote_plus(ADMIN_PASSWORD) + client = MongoClient( f"mongodb://{ADMIN_USER}:{ADMIN_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/?authSource=admin", serverSelectionTimeoutMS=5000 @@ -34,4 +38,11 @@ def db_connect(db_name : str = 'toxinfo', collection_name : str = 'substance_ind logger.error(f"Error connecting to MongoDB: {e}") return None - return client, db, collection + return collection + +if __name__ == "__main__": + coll = db_connect() + if coll: + logger.info("Database connection successful.") + else: + logger.error("Database connection failed.") \ No newline at end of file diff --git a/src/pif_compiler/services/srv_echa.py b/src/pif_compiler/services/srv_echa.py index a8da61f..e008f6d 100644 --- a/src/pif_compiler/services/srv_echa.py +++ b/src/pif_compiler/services/srv_echa.py @@ -46,8 +46,9 @@ def search_substance(cas : str) -> dict: "rmlName": result["substanceIndex"]["rmlName"], "rmlId": result["substanceIndex"]["rmlId"] } + log.info(f"Substance found for CAS {cas}: {substance['rmlName']}") return substance - log.error(f"Something went wrong") + log.error(f"Something went wrong searching the substance for CAS {cas}") return {} @@ -69,6 +70,7 @@ def get_dossier_info(rmlId: str) -> dict: "assetExternalId": response_dossier_json['items'][0]['assetExternalId'], "rootKey": response_dossier_json['items'][0]['rootKey'] } + log.info(f"Dossier info retrieved for RML ID {rmlId}") return dossier_info @@ -105,6 +107,8 @@ def get_substance_index(assetExternalId : str) -> dict: at_href = at_link['href'] index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html' + log.info(f"Substance index retrieved for Asset External ID {assetExternalId}") + return index_data #endregion @@ -303,6 +307,12 @@ def generate_pdf_from_toxicology_info(index: dict): page.goto(index['toxicological_information_link']) page.pdf(path=f'pdfs/{index["substance"]["rmlCas"]}.pdf') browser.close() + if os.path.exists(f'pdfs/{index["substance"]["rmlCas"]}.pdf'): + log.info(f"PDF generated for CAS {index['substance']['rmlCas']}") + return True + else: + log.error(f"PDF generation failed for CAS {index['substance']['rmlCas']}") + return False #endregion @@ -326,6 +336,8 @@ def echa_flow(cas) -> dict: "repeated_dose_toxicity": {} } + log.debug(f"ECHA flow intermediate result") + # Fetch and parse toxicological information txi_link = index.get('toxicological_information_link') if txi_link: @@ -349,7 +361,9 @@ def echa_flow(cas) -> dict: for key, value in result.items(): if value is None or value == "" or value == [] or value == {}: - return False + log.warning(f"Missing data for key: {key} in CAS {cas}") + else: + log.info(f"Data retrieved for key: {key} in CAS {cas}") return result def cas_validation(cas: str) -> str: @@ -367,9 +381,9 @@ def cas_validation(cas: str) -> str: return None def check_local(cas: str) -> bool: - client, db, collection = db_connect() + collection = db_connect() - if not collection: + if collection is None: log.error("No MongoDB collection available.") return None @@ -383,9 +397,9 @@ def check_local(cas: str) -> bool: return None def add_to_local(data: dict) -> bool: - client, db, collection = db_connect() + collection = db_connect() - if not collection: + if collection is None: log.error("No MongoDB collection available.") return False @@ -397,17 +411,22 @@ def add_to_local(data: dict) -> bool: log.error(f"Error inserting data into MongoDB: {e}") return False -def search_substance(cas: str) -> dict: +def orchestrator(cas: str) -> dict: + log.debug(f"Initiating search for CAS {cas} in ECHA service.") cas_validated = cas_validation(cas) if not cas_validated: return None else: + log.info(f"CAS {cas} validated successfully.") local_record = check_local(cas_validated) if local_record: + log.info(f"Returning local record for CAS {cas}.") return local_record else: + log.info(f"No local record, starting echa flow") echa_data = echa_flow(cas_validated) if echa_data: + log.info(f"Echa flow successful") add_to_local(echa_data) return echa_data else: @@ -416,4 +435,9 @@ def search_substance(cas: str) -> dict: # to do: check if document is complete # to do: check lastupdate -#endregion \ No newline at end of file +#endregion + +if __name__ == "__main__": + cas_test = "50-00-0" + result = orchestrator(cas_test) + print(result) \ No newline at end of file