import os from contextlib import contextmanager import pubchempy as pcp from pubchemprops.pubchemprops import get_second_layer_props import logging logging.basicConfig( format="{asctime} - {levelname} - {message}", style="{", datefmt="%Y-%m-%d %H:%M", filename="echa.log", encoding="utf-8", filemode="a", level=logging.INFO, ) @contextmanager def temporary_certificate(cert_path): # Sto robo serve perchè per usare l'API di PubChem serve cambiare temporaneamente il certificato con il quale # si fanno le richieste """ Context manager to temporarily change the certificate used for requests. Args: cert_path (str): Path to the certificate file to use temporarily Example: # Regular request uses default certificates requests.get('https://api.example.com') # Use custom certificate only within this block with temporary_certificate('custom-cert.pem'): requests.get('https://api.requiring.custom.cert.com') # Back to default certificates requests.get('https://api.example.com') """ # Store original environment variables original_ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE') original_ssl_cert = os.environ.get('SSL_CERT_FILE') try: # Set new certificate os.environ['REQUESTS_CA_BUNDLE'] = cert_path os.environ['SSL_CERT_FILE'] = cert_path yield finally: # Restore original environment variables if original_ca_bundle is not None: os.environ['REQUESTS_CA_BUNDLE'] = original_ca_bundle else: os.environ.pop('REQUESTS_CA_BUNDLE', None) if original_ssl_cert is not None: os.environ['SSL_CERT_FILE'] = original_ssl_cert else: os.environ.pop('SSL_CERT_FILE', None) def clean_property_data(api_response): """ Simplifies the API response data by flattening nested structures. Args: api_response (dict): Raw API response containing property data Returns: dict: Cleaned data with simplified structure """ cleaned_data = {} for property_name, measurements in api_response.items(): cleaned_measurements = [] for measurement in measurements: cleaned_measurement = { 'ReferenceNumber': measurement.get('ReferenceNumber'), 'Description': measurement.get('Description', ''), } # Handle Reference field if 'Reference' in measurement: # Check if Reference is a list or string ref = measurement['Reference'] cleaned_measurement['Reference'] = ref[0] if isinstance(ref, list) else ref # Handle Value field value = measurement.get('Value', {}) if isinstance(value, dict) and 'StringWithMarkup' in value: cleaned_measurement['Value'] = value['StringWithMarkup'][0]['String'] else: cleaned_measurement['Value'] = str(value) # Remove empty values cleaned_measurement = {k: v for k, v in cleaned_measurement.items() if v} cleaned_measurements.append(cleaned_measurement) cleaned_data[property_name] = cleaned_measurements return cleaned_data def pubchem_dap(cas): ''' Data un CAS in input ricerca le informazioni per la scheda di sicurezza su PubChem. Per estrarre le proprietà di 1o (sinonimi, cid, logP, MolecularWeight, ExactMass, TPSA) livello uso Pubchempy. Per quelle di 2o livello uso pubchemprops (Melting point) args: cas : string ''' with temporary_certificate('src/data/ncbi-nlm-nih-gov-catena.pem'): try: # Ricerca iniziale out = pcp.get_synonyms(cas, 'name') if out: out = out[0] output = {'CID' : out['CID'], 'CAS' : cas, 'first_pubchem_name' : out['Synonym'][0], 'pubchem_link' : f"https://pubchem.ncbi.nlm.nih.gov/compound/{out['CID']}"} else: return f'No results on PubChem for {cas}' except Exception as E: logging.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem search for {cas}', exc_info=True) try: # Ricerca delle proprietà properties = pcp.get_properties(['xlogp', 'molecular_weight', 'tpsa', 'exact_mass'], identifier = out['CID'], namespace='cid', searchtype=None, as_dataframe=False) if properties: output = {**output, **properties[0]} else: return output except Exception as E: logging.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem first level properties extraction for {cas}', exc_info=True) try: # Ricerca del Melting Point second_layer_props = get_second_layer_props(output['first_pubchem_name'], ['Melting Point', 'Dissociation Constants', 'pH']) if second_layer_props: second_layer_props = clean_property_data(second_layer_props) output = {**output, **second_layer_props} except Exception as E: logging.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem second level properties extraction (Melting Point) for {cas}', exc_info=True) return output