update: new endpoint for the api (cosing, pubchem, download)
This commit is contained in:
parent
5fd12cb7a7
commit
f04d4f8b3e
8 changed files with 370 additions and 158 deletions
222
src/pif_compiler/api/routes/common.py
Normal file
222
src/pif_compiler/api/routes/common.py
Normal file
|
|
@ -0,0 +1,222 @@
|
||||||
|
from fastapi import APIRouter, HTTPException, status
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from pydantic import BaseModel, Field, HttpUrl
|
||||||
|
from typing import Optional, Dict, Any
|
||||||
|
import os
|
||||||
|
|
||||||
|
from pif_compiler.functions.common_func import generate_pdf
|
||||||
|
from pif_compiler.services.srv_pubchem import pubchem_dap
|
||||||
|
from pif_compiler.functions.common_log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
class GeneratePdfRequest(BaseModel):
|
||||||
|
link: str = Field(..., description="URL of the page to convert to PDF")
|
||||||
|
name: str = Field(..., description="Name for the generated PDF file (without extension)")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
json_schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"link": "https://example.com/page",
|
||||||
|
"name": "my_document"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GeneratePdfResponse(BaseModel):
|
||||||
|
success: bool
|
||||||
|
name: str
|
||||||
|
message: str
|
||||||
|
file_path: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/common/generate-pdf", response_model=GeneratePdfResponse, tags=["Common"])
|
||||||
|
async def generate_pdf_endpoint(request: GeneratePdfRequest):
|
||||||
|
"""
|
||||||
|
Generate a PDF from a web page URL.
|
||||||
|
|
||||||
|
This endpoint uses Playwright to:
|
||||||
|
1. Navigate to the provided URL
|
||||||
|
2. Render the page
|
||||||
|
3. Generate a PDF file
|
||||||
|
4. Save it in the 'pdfs/' directory
|
||||||
|
|
||||||
|
If a PDF with the same name already exists, it will skip generation
|
||||||
|
and return success immediately.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: GeneratePdfRequest with the URL and desired PDF name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
GeneratePdfResponse with success status and file information
|
||||||
|
"""
|
||||||
|
logger.info(f"API request received to generate PDF: name='{request.name}', link='{request.link}'")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = generate_pdf(request.link, request.name)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
file_path = f"pdfs/{request.name}.pdf"
|
||||||
|
|
||||||
|
# Check if file was already existing or newly created
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
logger.info(f"PDF available for '{request.name}'")
|
||||||
|
return GeneratePdfResponse(
|
||||||
|
success=True,
|
||||||
|
name=request.name,
|
||||||
|
message=f"PDF generated successfully or already exists",
|
||||||
|
file_path=file_path
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(f"PDF file not found after generation for '{request.name}'")
|
||||||
|
return GeneratePdfResponse(
|
||||||
|
success=False,
|
||||||
|
name=request.name,
|
||||||
|
message="PDF generation completed but file not found",
|
||||||
|
file_path=None
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.error(f"PDF generation failed for '{request.name}'")
|
||||||
|
return GeneratePdfResponse(
|
||||||
|
success=False,
|
||||||
|
name=request.name,
|
||||||
|
message="PDF generation failed",
|
||||||
|
file_path=None
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating PDF for '{request.name}': {str(e)}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail=f"Internal error while generating PDF: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/common/download-pdf/{name}", response_class=FileResponse, tags=["Common"])
|
||||||
|
async def download_pdf(name: str):
|
||||||
|
"""
|
||||||
|
Download a previously generated PDF file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name: Name of the PDF file (without extension)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FileResponse with the PDF file for download
|
||||||
|
"""
|
||||||
|
logger.info(f"API request received to download PDF: name='{name}'")
|
||||||
|
|
||||||
|
file_path = f"pdfs/{name}.pdf"
|
||||||
|
|
||||||
|
if not os.path.exists(file_path):
|
||||||
|
logger.warning(f"PDF file not found: {file_path}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
detail=f"PDF file '{name}' not found. Please generate it first using /common/generate-pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"Serving PDF file: {file_path}")
|
||||||
|
return FileResponse(
|
||||||
|
path=file_path,
|
||||||
|
media_type="application/pdf",
|
||||||
|
filename=f"{name}.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PubchemRequest(BaseModel):
|
||||||
|
cas: str = Field(..., description="CAS number of the substance to search for in PubChem")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
json_schema_extra = {
|
||||||
|
"example": {
|
||||||
|
"cas": "64-17-5"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PubchemResponse(BaseModel):
|
||||||
|
success: bool
|
||||||
|
cas: str
|
||||||
|
data: Optional[Dict[str, Any]] = None
|
||||||
|
error: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/common/pubchem", response_model=PubchemResponse, tags=["Common"])
|
||||||
|
async def search_pubchem(request: PubchemRequest):
|
||||||
|
"""
|
||||||
|
Search for substance information in PubChem database.
|
||||||
|
|
||||||
|
This endpoint retrieves comprehensive substance data from PubChem including:
|
||||||
|
- **Basic info**: CID, CAS, first PubChem name, PubChem link
|
||||||
|
- **First level properties**: XLogP, molecular weight, TPSA, exact mass
|
||||||
|
- **Second level properties**: Melting Point, Dissociation Constants, pH
|
||||||
|
|
||||||
|
The data is automatically cleaned and formatted for easier consumption.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: PubchemRequest containing the CAS number
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
PubchemResponse with the substance data or error information
|
||||||
|
"""
|
||||||
|
logger.info(f"API request received for PubChem search: CAS={request.cas}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = pubchem_dap(request.cas)
|
||||||
|
|
||||||
|
# Check if result is None (error occurred)
|
||||||
|
if result is None:
|
||||||
|
logger.error(f"PubChem search returned None for CAS: {request.cas}")
|
||||||
|
return PubchemResponse(
|
||||||
|
success=False,
|
||||||
|
cas=request.cas,
|
||||||
|
data=None,
|
||||||
|
error="An error occurred while searching PubChem. Please check the logs for details."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if result is a string (no results found)
|
||||||
|
if isinstance(result, str):
|
||||||
|
logger.warning(f"No results found in PubChem for CAS: {request.cas}")
|
||||||
|
return PubchemResponse(
|
||||||
|
success=False,
|
||||||
|
cas=request.cas,
|
||||||
|
data=None,
|
||||||
|
error=result
|
||||||
|
)
|
||||||
|
|
||||||
|
# Successful result
|
||||||
|
logger.info(f"Successfully retrieved PubChem data for CAS: {request.cas}")
|
||||||
|
return PubchemResponse(
|
||||||
|
success=True,
|
||||||
|
cas=request.cas,
|
||||||
|
data=result,
|
||||||
|
error=None
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing PubChem request for CAS {request.cas}: {str(e)}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail=f"Internal error while processing PubChem request: {str(e)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/common/health", tags=["Common"])
|
||||||
|
async def common_health_check():
|
||||||
|
"""
|
||||||
|
Health check endpoint for common functions service.
|
||||||
|
|
||||||
|
Returns the status of the common functions components.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "common-functions",
|
||||||
|
"components": {
|
||||||
|
"api": "operational",
|
||||||
|
"logging": "operational",
|
||||||
|
"utilities": "operational",
|
||||||
|
"pubchem": "operational"
|
||||||
|
}
|
||||||
|
}
|
||||||
25
src/pif_compiler/functions/common_func.py
Normal file
25
src/pif_compiler/functions/common_func.py
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
import os
|
||||||
|
|
||||||
|
from pif_compiler.functions.common_log import get_logger
|
||||||
|
|
||||||
|
log = get_logger()
|
||||||
|
|
||||||
|
def generate_pdf(link : str, name : str):
|
||||||
|
if os.path.exists(f'pdfs/{name}.pdf'):
|
||||||
|
log.info(f"PDF already exists for {name}, skipping generation.")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
log.info(f"Generating PDF for {name} from link: {link}")
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch()
|
||||||
|
page = browser.new_page()
|
||||||
|
page.goto(link)
|
||||||
|
page.pdf(path=f'pdfs/{name}.pdf')
|
||||||
|
browser.close()
|
||||||
|
if os.path.exists(f'pdfs/{name}.pdf'):
|
||||||
|
log.info(f"PDF generated for {name}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
log.error(f"PDF generation failed for {name}")
|
||||||
|
return False
|
||||||
|
|
@ -8,7 +8,7 @@ import time
|
||||||
from pif_compiler.functions.common_log import get_logger
|
from pif_compiler.functions.common_log import get_logger
|
||||||
|
|
||||||
# Import dei tuoi router
|
# Import dei tuoi router
|
||||||
from pif_compiler.api.routes import api_echa
|
from pif_compiler.api.routes import api_echa, api_cosing, common
|
||||||
|
|
||||||
# Configurazione logging
|
# Configurazione logging
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
@ -123,6 +123,17 @@ app.include_router(
|
||||||
tags=["ECHA"]
|
tags=["ECHA"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
app.include_router(
|
||||||
|
api_cosing.router,
|
||||||
|
prefix="/api/v1",
|
||||||
|
tags=["COSING"]
|
||||||
|
)
|
||||||
|
|
||||||
|
app.include_router(
|
||||||
|
common.router,
|
||||||
|
prefix="/api/v1",
|
||||||
|
tags=["Common"]
|
||||||
|
)
|
||||||
|
|
||||||
# ==================== ROOT ENDPOINTS ====================
|
# ==================== ROOT ENDPOINTS ====================
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ from pif_compiler.services.srv_cosing import (
|
||||||
)
|
)
|
||||||
|
|
||||||
# PubChem Service
|
# PubChem Service
|
||||||
from pif_compiler.services.pubchem_service import (
|
from pif_compiler.services.srv_pubchem import (
|
||||||
pubchem_dap,
|
pubchem_dap,
|
||||||
clean_property_data,
|
clean_property_data,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,138 +0,0 @@
|
||||||
import os
|
|
||||||
from contextlib import contextmanager
|
|
||||||
import pubchempy as pcp
|
|
||||||
from pubchemprops.pubchemprops import get_second_layer_props
|
|
||||||
|
|
||||||
from pif_compiler.functions.common_log import get_logger
|
|
||||||
|
|
||||||
logger = get_logger()
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def temporary_certificate(cert_path):
|
|
||||||
"""
|
|
||||||
Context manager to temporarily change the certificate used for requests.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
cert_path (str): Path to the certificate file to use temporarily
|
|
||||||
|
|
||||||
Example:
|
|
||||||
# Regular request uses default certificates
|
|
||||||
requests.get('https://api.example.com')
|
|
||||||
|
|
||||||
# Use custom certificate only within this block
|
|
||||||
with temporary_certificate('custom-cert.pem'):
|
|
||||||
requests.get('https://api.requiring.custom.cert.com')
|
|
||||||
|
|
||||||
# Back to default certificates
|
|
||||||
requests.get('https://api.example.com')
|
|
||||||
"""
|
|
||||||
# Store original environment variables
|
|
||||||
original_ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE')
|
|
||||||
original_ssl_cert = os.environ.get('SSL_CERT_FILE')
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Set new certificate
|
|
||||||
os.environ['REQUESTS_CA_BUNDLE'] = cert_path
|
|
||||||
os.environ['SSL_CERT_FILE'] = cert_path
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
# Restore original environment variables
|
|
||||||
if original_ca_bundle is not None:
|
|
||||||
os.environ['REQUESTS_CA_BUNDLE'] = original_ca_bundle
|
|
||||||
else:
|
|
||||||
os.environ.pop('REQUESTS_CA_BUNDLE', None)
|
|
||||||
|
|
||||||
if original_ssl_cert is not None:
|
|
||||||
os.environ['SSL_CERT_FILE'] = original_ssl_cert
|
|
||||||
else:
|
|
||||||
os.environ.pop('SSL_CERT_FILE', None)
|
|
||||||
|
|
||||||
def clean_property_data(api_response):
|
|
||||||
"""
|
|
||||||
Simplifies the API response data by flattening nested structures.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
api_response (dict): Raw API response containing property data
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Cleaned data with simplified structure
|
|
||||||
"""
|
|
||||||
cleaned_data = {}
|
|
||||||
|
|
||||||
for property_name, measurements in api_response.items():
|
|
||||||
cleaned_measurements = []
|
|
||||||
|
|
||||||
for measurement in measurements:
|
|
||||||
cleaned_measurement = {
|
|
||||||
'ReferenceNumber': measurement.get('ReferenceNumber'),
|
|
||||||
'Description': measurement.get('Description', ''),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Handle Reference field
|
|
||||||
if 'Reference' in measurement:
|
|
||||||
# Check if Reference is a list or string
|
|
||||||
ref = measurement['Reference']
|
|
||||||
cleaned_measurement['Reference'] = ref[0] if isinstance(ref, list) else ref
|
|
||||||
|
|
||||||
# Handle Value field
|
|
||||||
value = measurement.get('Value', {})
|
|
||||||
if isinstance(value, dict) and 'StringWithMarkup' in value:
|
|
||||||
cleaned_measurement['Value'] = value['StringWithMarkup'][0]['String']
|
|
||||||
else:
|
|
||||||
cleaned_measurement['Value'] = str(value)
|
|
||||||
|
|
||||||
# Remove empty values
|
|
||||||
cleaned_measurement = {k: v for k, v in cleaned_measurement.items() if v}
|
|
||||||
|
|
||||||
cleaned_measurements.append(cleaned_measurement)
|
|
||||||
|
|
||||||
cleaned_data[property_name] = cleaned_measurements
|
|
||||||
|
|
||||||
return cleaned_data
|
|
||||||
|
|
||||||
def pubchem_dap(cas):
|
|
||||||
'''
|
|
||||||
Data un CAS in input ricerca le informazioni per la scheda di sicurezza su PubChem.
|
|
||||||
Per estrarre le proprietà di 1o (sinonimi, cid, logP, MolecularWeight, ExactMass, TPSA) livello uso Pubchempy.
|
|
||||||
Per quelle di 2o livello uso pubchemprops (Melting point)
|
|
||||||
|
|
||||||
args:
|
|
||||||
cas : string
|
|
||||||
|
|
||||||
'''
|
|
||||||
with temporary_certificate('src/data/ncbi-nlm-nih-gov-catena.pem'):
|
|
||||||
try:
|
|
||||||
# Ricerca iniziale
|
|
||||||
out = pcp.get_synonyms(cas, 'name')
|
|
||||||
if out:
|
|
||||||
out = out[0]
|
|
||||||
output = {'CID' : out['CID'],
|
|
||||||
'CAS' : cas,
|
|
||||||
'first_pubchem_name' : out['Synonym'][0],
|
|
||||||
'pubchem_link' : f"https://pubchem.ncbi.nlm.nih.gov/compound/{out['CID']}"}
|
|
||||||
else:
|
|
||||||
return f'No results on PubChem for {cas}'
|
|
||||||
|
|
||||||
except Exception as E:
|
|
||||||
logger.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem search for {cas}', exc_info=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Ricerca delle proprietà
|
|
||||||
properties = pcp.get_properties(['xlogp', 'molecular_weight', 'tpsa', 'exact_mass'], identifier = out['CID'], namespace='cid', searchtype=None, as_dataframe=False)
|
|
||||||
if properties:
|
|
||||||
output = {**output, **properties[0]}
|
|
||||||
else:
|
|
||||||
return output
|
|
||||||
except Exception as E:
|
|
||||||
logger.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem first level properties extraction for {cas}', exc_info=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Ricerca del Melting Point
|
|
||||||
second_layer_props = get_second_layer_props(output['first_pubchem_name'], ['Melting Point', 'Dissociation Constants', 'pH'])
|
|
||||||
if second_layer_props:
|
|
||||||
second_layer_props = clean_property_data(second_layer_props)
|
|
||||||
output = {**output, **second_layer_props}
|
|
||||||
except Exception as E:
|
|
||||||
logger.error(f'various_utils.pubchem.pubchem_dap(). Some error during pubchem second level properties extraction (Melting Point) for {cas}', exc_info=True)
|
|
||||||
|
|
||||||
return output
|
|
||||||
|
|
@ -300,24 +300,6 @@ def parse_toxicology_html(html_content):
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region PDF extraction functions
|
|
||||||
|
|
||||||
def generate_pdf_from_toxicology_info(index: dict):
|
|
||||||
with sync_playwright() as p:
|
|
||||||
browser = p.chromium.launch()
|
|
||||||
page = browser.new_page()
|
|
||||||
page.goto(index['toxicological_information_link'])
|
|
||||||
page.pdf(path=f'pdfs/{index["substance"]["rmlCas"]}.pdf')
|
|
||||||
browser.close()
|
|
||||||
if os.path.exists(f'pdfs/{index["substance"]["rmlCas"]}.pdf'):
|
|
||||||
log.info(f"PDF generated for CAS {index['substance']['rmlCas']}")
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
log.error(f"PDF generation failed for CAS {index['substance']['rmlCas']}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
#region Orchestrator functions
|
#region Orchestrator functions
|
||||||
|
|
||||||
def echa_flow(cas) -> dict:
|
def echa_flow(cas) -> dict:
|
||||||
|
|
|
||||||
110
src/pif_compiler/services/srv_pubchem.py
Normal file
110
src/pif_compiler/services/srv_pubchem.py
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
import pubchempy as pcp
|
||||||
|
from pubchemprops.pubchemprops import get_second_layer_props
|
||||||
|
|
||||||
|
from pif_compiler.functions.common_log import get_logger
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
def clean_property_data(api_response):
|
||||||
|
"""
|
||||||
|
Simplifies the API response data by flattening nested structures.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_response (dict): Raw API response containing property data
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: Cleaned data with simplified structure
|
||||||
|
"""
|
||||||
|
cleaned_data = {}
|
||||||
|
|
||||||
|
for property_name, measurements in api_response.items():
|
||||||
|
cleaned_measurements = []
|
||||||
|
|
||||||
|
for measurement in measurements:
|
||||||
|
cleaned_measurement = {
|
||||||
|
'ReferenceNumber': measurement.get('ReferenceNumber'),
|
||||||
|
'Description': measurement.get('Description', ''),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle Reference field
|
||||||
|
if 'Reference' in measurement:
|
||||||
|
# Check if Reference is a list or string
|
||||||
|
ref = measurement['Reference']
|
||||||
|
cleaned_measurement['Reference'] = ref[0] if isinstance(ref, list) else ref
|
||||||
|
|
||||||
|
# Handle Value field
|
||||||
|
value = measurement.get('Value', {})
|
||||||
|
if isinstance(value, dict) and 'StringWithMarkup' in value:
|
||||||
|
cleaned_measurement['Value'] = value['StringWithMarkup'][0]['String']
|
||||||
|
else:
|
||||||
|
cleaned_measurement['Value'] = str(value)
|
||||||
|
|
||||||
|
# Remove empty values
|
||||||
|
cleaned_measurement = {k: v for k, v in cleaned_measurement.items() if v}
|
||||||
|
|
||||||
|
cleaned_measurements.append(cleaned_measurement)
|
||||||
|
|
||||||
|
cleaned_data[property_name] = cleaned_measurements
|
||||||
|
|
||||||
|
return cleaned_data
|
||||||
|
|
||||||
|
def pubchem_dap(cas):
|
||||||
|
'''
|
||||||
|
Data un CAS in input ricerca le informazioni per la scheda di sicurezza su PubChem.
|
||||||
|
Per estrarre le proprietà di 1o (sinonimi, cid, logP, MolecularWeight, ExactMass, TPSA) livello uso Pubchempy.
|
||||||
|
Per quelle di 2o livello uso pubchemprops (Melting point)
|
||||||
|
|
||||||
|
args:
|
||||||
|
cas : string
|
||||||
|
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
# Ricerca iniziale
|
||||||
|
logger.info(f"Searching PubChem for CAS: {cas}")
|
||||||
|
out = pcp.get_synonyms(cas, 'name')
|
||||||
|
if out:
|
||||||
|
out = out[0]
|
||||||
|
output = {'CID' : out['CID'],
|
||||||
|
'CAS' : cas,
|
||||||
|
'first_pubchem_name' : out['Synonym'][0],
|
||||||
|
'pubchem_link' : f"https://pubchem.ncbi.nlm.nih.gov/compound/{out['CID']}"}
|
||||||
|
logger.info(f"Found PubChem entry for {cas}: CID {out['CID']}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No results on PubChem for {cas}")
|
||||||
|
return f'No results on PubChem for {cas}'
|
||||||
|
|
||||||
|
except Exception as E:
|
||||||
|
logger.error(f'Error during pubchem search for {cas}', exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ricerca delle proprietà
|
||||||
|
logger.debug(f"Fetching first level properties for CID {output['CID']}")
|
||||||
|
properties = pcp.get_properties(['xlogp', 'molecular_weight', 'tpsa', 'exact_mass'], identifier = out['CID'], namespace='cid', searchtype=None, as_dataframe=False)
|
||||||
|
if properties:
|
||||||
|
output = {**output, **properties[0]}
|
||||||
|
logger.debug(f"Successfully retrieved first level properties for {cas}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No first level properties found for {cas}")
|
||||||
|
return output
|
||||||
|
except Exception as E:
|
||||||
|
logger.error(f'Error during pubchem first level properties extraction for {cas}', exc_info=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Ricerca del Melting Point
|
||||||
|
logger.debug(f"Fetching second level properties for {output['first_pubchem_name']}")
|
||||||
|
second_layer_props = get_second_layer_props(output['first_pubchem_name'], ['Melting Point', 'Dissociation Constants', 'pH'])
|
||||||
|
if second_layer_props:
|
||||||
|
second_layer_props = clean_property_data(second_layer_props)
|
||||||
|
output = {**output, **second_layer_props}
|
||||||
|
logger.debug(f"Successfully retrieved second level properties for {cas}")
|
||||||
|
except Exception as E:
|
||||||
|
logger.error(f'Error during pubchem second level properties extraction for {cas}', exc_info=True)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Esempio di utilizzo
|
||||||
|
cas_number = "64-17-5" # CAS per l'etanolo
|
||||||
|
result = pubchem_dap(cas_number)
|
||||||
|
print(result)
|
||||||
Loading…
Reference in a new issue