update
This commit is contained in:
parent
fcc8123966
commit
4cabf6fa11
4 changed files with 44 additions and 52 deletions
0
README.md
Normal file
0
README.md
Normal file
|
|
@ -14,30 +14,6 @@ Modules:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# ECHA Services
|
# ECHA Services
|
||||||
from pif_compiler.services.echa_find import (
|
|
||||||
search_dossier,
|
|
||||||
)
|
|
||||||
|
|
||||||
from pif_compiler.services.echa_process import (
|
|
||||||
echaExtract,
|
|
||||||
echaExtract_multi,
|
|
||||||
echaExtract_specific,
|
|
||||||
echaExtract_local,
|
|
||||||
echa_noael_ld50,
|
|
||||||
echa_noael_ld50_multi,
|
|
||||||
echaPage_to_md,
|
|
||||||
openEchaPage,
|
|
||||||
markdown_to_json_raw,
|
|
||||||
clean_json,
|
|
||||||
json_to_dataframe,
|
|
||||||
filter_dataframe_by_dict,
|
|
||||||
)
|
|
||||||
|
|
||||||
from pif_compiler.services.echa_pdf import (
|
|
||||||
generate_pdf_with_header_and_cleanup,
|
|
||||||
search_generate_pdfs,
|
|
||||||
svg_to_data_uri,
|
|
||||||
)
|
|
||||||
|
|
||||||
# COSING Service
|
# COSING Service
|
||||||
from pif_compiler.services.cosing_service import (
|
from pif_compiler.services.cosing_service import (
|
||||||
|
|
@ -62,25 +38,6 @@ from pif_compiler.services.db_utils import get_client
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
# ECHA Find
|
|
||||||
"search_dossier",
|
|
||||||
# ECHA Process
|
|
||||||
"echaExtract",
|
|
||||||
"echaExtract_multi",
|
|
||||||
"echaExtract_specific",
|
|
||||||
"echaExtract_local",
|
|
||||||
"echa_noael_ld50",
|
|
||||||
"echa_noael_ld50_multi",
|
|
||||||
"echaPage_to_md",
|
|
||||||
"openEchaPage",
|
|
||||||
"markdown_to_json_raw",
|
|
||||||
"clean_json",
|
|
||||||
"json_to_dataframe",
|
|
||||||
"filter_dataframe_by_dict",
|
|
||||||
# ECHA PDF
|
|
||||||
"generate_pdf_with_header_and_cleanup",
|
|
||||||
"search_generate_pdfs",
|
|
||||||
"svg_to_data_uri",
|
|
||||||
# COSING Service
|
# COSING Service
|
||||||
"cosing_search",
|
"cosing_search",
|
||||||
"clean_cosing",
|
"clean_cosing",
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
from urllib.parse import quote_plus
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
|
|
@ -15,6 +16,9 @@ def get_client():
|
||||||
MONGO_HOST = os.getenv("MONGO_HOST")
|
MONGO_HOST = os.getenv("MONGO_HOST")
|
||||||
MONGO_PORT = os.getenv("MONGO_PORT")
|
MONGO_PORT = os.getenv("MONGO_PORT")
|
||||||
|
|
||||||
|
MONGO_PORT = MONGO_PORT
|
||||||
|
ADMIN_PASSWORD = quote_plus(ADMIN_PASSWORD)
|
||||||
|
|
||||||
client = MongoClient(
|
client = MongoClient(
|
||||||
f"mongodb://{ADMIN_USER}:{ADMIN_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/?authSource=admin",
|
f"mongodb://{ADMIN_USER}:{ADMIN_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/?authSource=admin",
|
||||||
serverSelectionTimeoutMS=5000
|
serverSelectionTimeoutMS=5000
|
||||||
|
|
@ -34,4 +38,11 @@ def db_connect(db_name : str = 'toxinfo', collection_name : str = 'substance_ind
|
||||||
logger.error(f"Error connecting to MongoDB: {e}")
|
logger.error(f"Error connecting to MongoDB: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return client, db, collection
|
return collection
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
coll = db_connect()
|
||||||
|
if coll:
|
||||||
|
logger.info("Database connection successful.")
|
||||||
|
else:
|
||||||
|
logger.error("Database connection failed.")
|
||||||
|
|
@ -46,8 +46,9 @@ def search_substance(cas : str) -> dict:
|
||||||
"rmlName": result["substanceIndex"]["rmlName"],
|
"rmlName": result["substanceIndex"]["rmlName"],
|
||||||
"rmlId": result["substanceIndex"]["rmlId"]
|
"rmlId": result["substanceIndex"]["rmlId"]
|
||||||
}
|
}
|
||||||
|
log.info(f"Substance found for CAS {cas}: {substance['rmlName']}")
|
||||||
return substance
|
return substance
|
||||||
log.error(f"Something went wrong")
|
log.error(f"Something went wrong searching the substance for CAS {cas}")
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -69,6 +70,7 @@ def get_dossier_info(rmlId: str) -> dict:
|
||||||
"assetExternalId": response_dossier_json['items'][0]['assetExternalId'],
|
"assetExternalId": response_dossier_json['items'][0]['assetExternalId'],
|
||||||
"rootKey": response_dossier_json['items'][0]['rootKey']
|
"rootKey": response_dossier_json['items'][0]['rootKey']
|
||||||
}
|
}
|
||||||
|
log.info(f"Dossier info retrieved for RML ID {rmlId}")
|
||||||
return dossier_info
|
return dossier_info
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -105,6 +107,8 @@ def get_substance_index(assetExternalId : str) -> dict:
|
||||||
at_href = at_link['href']
|
at_href = at_link['href']
|
||||||
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
|
index_data['acute_toxicity_link'] = LINK_DOSSIER + at_href + '.html'
|
||||||
|
|
||||||
|
log.info(f"Substance index retrieved for Asset External ID {assetExternalId}")
|
||||||
|
|
||||||
return index_data
|
return index_data
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
@ -303,6 +307,12 @@ def generate_pdf_from_toxicology_info(index: dict):
|
||||||
page.goto(index['toxicological_information_link'])
|
page.goto(index['toxicological_information_link'])
|
||||||
page.pdf(path=f'pdfs/{index["substance"]["rmlCas"]}.pdf')
|
page.pdf(path=f'pdfs/{index["substance"]["rmlCas"]}.pdf')
|
||||||
browser.close()
|
browser.close()
|
||||||
|
if os.path.exists(f'pdfs/{index["substance"]["rmlCas"]}.pdf'):
|
||||||
|
log.info(f"PDF generated for CAS {index['substance']['rmlCas']}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
log.error(f"PDF generation failed for CAS {index['substance']['rmlCas']}")
|
||||||
|
return False
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
|
|
@ -326,6 +336,8 @@ def echa_flow(cas) -> dict:
|
||||||
"repeated_dose_toxicity": {}
|
"repeated_dose_toxicity": {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.debug(f"ECHA flow intermediate result")
|
||||||
|
|
||||||
# Fetch and parse toxicological information
|
# Fetch and parse toxicological information
|
||||||
txi_link = index.get('toxicological_information_link')
|
txi_link = index.get('toxicological_information_link')
|
||||||
if txi_link:
|
if txi_link:
|
||||||
|
|
@ -349,7 +361,9 @@ def echa_flow(cas) -> dict:
|
||||||
|
|
||||||
for key, value in result.items():
|
for key, value in result.items():
|
||||||
if value is None or value == "" or value == [] or value == {}:
|
if value is None or value == "" or value == [] or value == {}:
|
||||||
return False
|
log.warning(f"Missing data for key: {key} in CAS {cas}")
|
||||||
|
else:
|
||||||
|
log.info(f"Data retrieved for key: {key} in CAS {cas}")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def cas_validation(cas: str) -> str:
|
def cas_validation(cas: str) -> str:
|
||||||
|
|
@ -367,9 +381,9 @@ def cas_validation(cas: str) -> str:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def check_local(cas: str) -> bool:
|
def check_local(cas: str) -> bool:
|
||||||
client, db, collection = db_connect()
|
collection = db_connect()
|
||||||
|
|
||||||
if not collection:
|
if collection is None:
|
||||||
log.error("No MongoDB collection available.")
|
log.error("No MongoDB collection available.")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -383,9 +397,9 @@ def check_local(cas: str) -> bool:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def add_to_local(data: dict) -> bool:
|
def add_to_local(data: dict) -> bool:
|
||||||
client, db, collection = db_connect()
|
collection = db_connect()
|
||||||
|
|
||||||
if not collection:
|
if collection is None:
|
||||||
log.error("No MongoDB collection available.")
|
log.error("No MongoDB collection available.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
@ -397,17 +411,22 @@ def add_to_local(data: dict) -> bool:
|
||||||
log.error(f"Error inserting data into MongoDB: {e}")
|
log.error(f"Error inserting data into MongoDB: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def search_substance(cas: str) -> dict:
|
def orchestrator(cas: str) -> dict:
|
||||||
|
log.debug(f"Initiating search for CAS {cas} in ECHA service.")
|
||||||
cas_validated = cas_validation(cas)
|
cas_validated = cas_validation(cas)
|
||||||
if not cas_validated:
|
if not cas_validated:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
|
log.info(f"CAS {cas} validated successfully.")
|
||||||
local_record = check_local(cas_validated)
|
local_record = check_local(cas_validated)
|
||||||
if local_record:
|
if local_record:
|
||||||
|
log.info(f"Returning local record for CAS {cas}.")
|
||||||
return local_record
|
return local_record
|
||||||
else:
|
else:
|
||||||
|
log.info(f"No local record, starting echa flow")
|
||||||
echa_data = echa_flow(cas_validated)
|
echa_data = echa_flow(cas_validated)
|
||||||
if echa_data:
|
if echa_data:
|
||||||
|
log.info(f"Echa flow successful")
|
||||||
add_to_local(echa_data)
|
add_to_local(echa_data)
|
||||||
return echa_data
|
return echa_data
|
||||||
else:
|
else:
|
||||||
|
|
@ -417,3 +436,8 @@ def search_substance(cas: str) -> dict:
|
||||||
# to do: check if document is complete
|
# to do: check if document is complete
|
||||||
# to do: check lastupdate
|
# to do: check lastupdate
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
cas_test = "50-00-0"
|
||||||
|
result = orchestrator(cas_test)
|
||||||
|
print(result)
|
||||||
Loading…
Reference in a new issue