467 lines
No EOL
24 KiB
Python
467 lines
No EOL
24 KiB
Python
import os
|
|
import base64
|
|
import traceback
|
|
import logging # Import logging module
|
|
import datetime
|
|
import pandas as pd
|
|
# import time # Keep if you use page.wait_for_timeout
|
|
from playwright.sync_api import sync_playwright, TimeoutError # Catch specific errors
|
|
from src.func.find import search_dossier
|
|
import requests
|
|
|
|
# --- Basic Logging Setup (Commented Out) ---
|
|
# # Configure logging - uncomment and customize level/handler as needed
|
|
# logging.basicConfig(
|
|
# level=logging.INFO, # Or DEBUG for more details
|
|
# format='%(asctime)s - %(levelname)s - %(message)s',
|
|
# # filename='pdf_generator.log', # Optional: Log to a file
|
|
# # filemode='a'
|
|
# )
|
|
# --- End Logging Setup ---
|
|
|
|
|
|
# Assume svg_to_data_uri is defined elsewhere correctly
|
|
def svg_to_data_uri(svg_path):
|
|
try:
|
|
if not os.path.exists(svg_path):
|
|
# logging.error(f"SVG file not found: {svg_path}") # Example logging
|
|
raise FileNotFoundError(f"SVG file not found: {svg_path}")
|
|
with open(svg_path, 'rb') as f:
|
|
svg_content = f.read()
|
|
encoded_svg = base64.b64encode(svg_content).decode('utf-8')
|
|
return f"data:image/svg+xml;base64,{encoded_svg}"
|
|
except Exception as e:
|
|
print(f"Error converting SVG {svg_path}: {e}")
|
|
# logging.error(f"Error converting SVG {svg_path}: {e}", exc_info=True) # Example logging
|
|
return None
|
|
|
|
# --- JavaScript Expressions ---
|
|
|
|
# Define the cleanup logic as an immediately-invoked arrow function expression
|
|
# NOTE: .das-block_empty removal is currently disabled as per previous step
|
|
cleanup_js_expression = """
|
|
() => {
|
|
console.log('Running cleanup JS (DISABLED .das-block_empty removal)...');
|
|
let totalRemoved = 0;
|
|
|
|
// Example 1: Remove sections explicitly marked as empty (Currently Disabled)
|
|
// const emptyBlocks = document.querySelectorAll('.das-block_empty');
|
|
// emptyBlocks.forEach(el => {
|
|
// if (el && el.parentNode) {
|
|
// console.log(`Removing '.das-block_empty' block with ID: ${el.id || 'N/A'}`);
|
|
// el.remove();
|
|
// totalRemoved++;
|
|
// }
|
|
// });
|
|
|
|
// Add other specific cleanup logic here if needed
|
|
|
|
console.log(`Cleanup script removed ${totalRemoved} elements (DISABLED .das-block_empty removal).`);
|
|
return totalRemoved; // Return the count
|
|
}
|
|
"""
|
|
# --- End JavaScript Expressions ---
|
|
|
|
|
|
def generate_pdf_with_header_and_cleanup(
|
|
url,
|
|
pdf_path,
|
|
substance_name,
|
|
substance_link,
|
|
ec_number,
|
|
cas_number,
|
|
header_template_path=r"src\func\resources\injectableHeader.html",
|
|
echa_chem_logo_path=r"src\func\resources\echa_chem_logo.svg",
|
|
echa_logo_path=r"src\func\resources\ECHA_Logo.svg"
|
|
) -> bool: # Added return type hint
|
|
"""
|
|
Generates a PDF with a dynamic header and optionally removes empty sections.
|
|
Provides basic logging (commented out) and returns True/False for success/failure.
|
|
|
|
Args:
|
|
url (str): The target URL OR local HTML file path.
|
|
pdf_path (str): The output PDF path.
|
|
substance_name (str): The name of the chemical substance.
|
|
substance_link (str): The URL the substance name should link to (in header).
|
|
ec_number (str): The EC number for the substance.
|
|
cas_number (str): The CAS number for the substance.
|
|
header_template_path (str): Path to the HTML header template file.
|
|
echa_chem_logo_path (str): Path to the echa_chem_logo.svg file.
|
|
echa_logo_path (str): Path to the ECHA_Logo.svg file.
|
|
|
|
Returns:
|
|
bool: True if the PDF was generated successfully, False otherwise.
|
|
"""
|
|
final_header_html = None
|
|
# logging.info(f"Starting PDF generation for URL: {url} to path: {pdf_path}") # Example logging
|
|
|
|
# --- 1. Prepare Header HTML ---
|
|
try:
|
|
# logging.debug(f"Reading header template from: {header_template_path}") # Example logging
|
|
print(f"Reading header template from: {header_template_path}")
|
|
if not os.path.exists(header_template_path):
|
|
raise FileNotFoundError(f"Header template file not found: {header_template_path}")
|
|
with open(header_template_path, 'r', encoding='utf-8') as f:
|
|
header_template_content = f.read()
|
|
if not header_template_content:
|
|
raise ValueError("Header template file is empty.")
|
|
|
|
# logging.debug("Converting logos...") # Example logging
|
|
print("Converting logos...")
|
|
logo1_data_uri = svg_to_data_uri(echa_chem_logo_path)
|
|
logo2_data_uri = svg_to_data_uri(echa_logo_path)
|
|
if not logo1_data_uri or not logo2_data_uri:
|
|
raise ValueError("Failed to convert one or both logos to Data URIs.")
|
|
|
|
# logging.debug("Replacing placeholders...") # Example logging
|
|
print("Replacing placeholders...")
|
|
final_header_html = header_template_content.replace("##ECHA_CHEM_LOGO_SRC##", logo1_data_uri)
|
|
final_header_html = final_header_html.replace("##ECHA_LOGO_SRC##", logo2_data_uri)
|
|
final_header_html = final_header_html.replace("##SUBSTANCE_NAME##", substance_name)
|
|
final_header_html = final_header_html.replace("##SUBSTANCE_LINK##", substance_link)
|
|
final_header_html = final_header_html.replace("##EC_NUMBER##", ec_number)
|
|
final_header_html = final_header_html.replace("##CAS_NUMBER##", cas_number)
|
|
|
|
if "##" in final_header_html:
|
|
print("Warning: Not all placeholders seem replaced in the header HTML.")
|
|
# logging.warning("Not all placeholders seem replaced in the header HTML.") # Example logging
|
|
|
|
except Exception as e:
|
|
print(f"Error during header setup phase: {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Error during header setup phase: {e}", exc_info=True) # Example logging
|
|
return False # Return False on header setup failure
|
|
# --- End Header Prep ---
|
|
|
|
# --- CSS Override Definition ---
|
|
# Using Revision 4 from previous step (simplified breaks, boundary focus)
|
|
selectors_to_fix = [
|
|
'.das-field .das-field_value_html',
|
|
'.das-field .das-field_value_large',
|
|
'.das-field .das-value_remark-text'
|
|
]
|
|
css_selector_string = ",\n".join(selectors_to_fix)
|
|
css_override = f"""
|
|
<style id='pdf-override-styles'>
|
|
/* Basic Resets & Overflows */
|
|
html, body {{ height: auto !important; overflow: visible !important; margin: 0 !important; padding: 0 !important; }}
|
|
* {{ box-sizing: border-box; }}
|
|
{css_selector_string} {{
|
|
overflow: visible !important; overflow-y: visible !important; height: auto !important; max-height: none !important;
|
|
}}
|
|
/* Boundary Fix */
|
|
#pdf-custom-header {{ margin-bottom: 0 !important; padding-bottom: 1px !important; page-break-after: auto !important; display: block !important; }}
|
|
#pdf-custom-header + .body-inner {{ margin-top: 0 !important; padding-top: 0 !important; page-break-before: auto !important; display: block !important; }}
|
|
.body-inner .document-header {{ margin-top: 0 !important; padding-top: 0 !important; page-break-before: auto !important; }}
|
|
/* Simplified Page Breaks */
|
|
.body-inner h1, .body-inner h2, .body-inner h3, .body-inner h4, .body-inner h5, .body-inner h6 {{ page-break-after: avoid !important; }}
|
|
#pdf-custom-header h2 {{ page-break-after: auto !important; }}
|
|
@media print {{
|
|
html, body {{ height: auto !important; overflow: visible !important; margin: 0; padding: 0; }}
|
|
#pdf-custom-header {{ margin-bottom: 0 !important; padding-bottom: 1px !important; page-break-after: auto !important; display: block !important;}}
|
|
#pdf-custom-header + .body-inner {{ margin-top: 0 !important; padding-top: 0 !important; page-break-before: auto !important; display: block !important; }}
|
|
.body-inner .document-header {{ margin-top: 0 !important; padding-top: 0 !important; page-break-before: auto !important; }}
|
|
.body-inner h1, .body-inner h2, .body-inner h3, .body-inner h4, .body-inner h5, .body-inner h6 {{ page-break-after: avoid !important; }}
|
|
#pdf-custom-header h2 {{ page-break-after: auto !important; }}
|
|
.das-doc-toolbar, .document-header__section-links, #das-totop {{ display: none !important; }}
|
|
}}
|
|
</style>
|
|
"""
|
|
# --- End CSS Override Definition ---
|
|
|
|
# --- Playwright Automation ---
|
|
try:
|
|
with sync_playwright() as p:
|
|
# logging.debug("Launching browser...") # Example logging
|
|
# browser = p.chromium.launch(headless=False, devtools=True) # For debugging
|
|
browser = p.chromium.launch()
|
|
page = browser.new_page()
|
|
# Capture console messages (Corrected: use msg.text)
|
|
page.on("console", lambda msg: print(f"Browser Console: {msg.text}"))
|
|
|
|
try:
|
|
# logging.info(f"Navigating to page: {url}") # Example logging
|
|
print(f"Navigating to: {url}")
|
|
if os.path.exists(url) and not url.startswith('file://'):
|
|
page_url = f'file://{os.path.abspath(url)}'
|
|
# logging.info(f"Treating as local file: {page_url}") # Example logging
|
|
print(f"Treating as local file: {page_url}")
|
|
else:
|
|
page_url = url
|
|
|
|
page.goto(page_url, wait_until='load', timeout=90000)
|
|
# logging.info("Page navigation complete.") # Example logging
|
|
|
|
# logging.debug("Injecting header HTML...") # Example logging
|
|
print("Injecting header HTML...")
|
|
page.evaluate(f'(headerHtml) => {{ document.body.insertAdjacentHTML("afterbegin", headerHtml); }}', final_header_html)
|
|
|
|
# logging.debug("Injecting CSS overrides...") # Example logging
|
|
print("Injecting CSS overrides...")
|
|
page.evaluate(f"""(css) => {{
|
|
const existingStyle = document.getElementById('pdf-override-styles');
|
|
if (existingStyle) existingStyle.remove();
|
|
document.head.insertAdjacentHTML('beforeend', css);
|
|
}}""", css_override)
|
|
|
|
# logging.debug("Running JavaScript cleanup function...") # Example logging
|
|
print("Running JavaScript cleanup function...")
|
|
elements_removed_count = page.evaluate(cleanup_js_expression)
|
|
# logging.info(f"Cleanup script finished (reported removing {elements_removed_count} elements).") # Example logging
|
|
print(f"Cleanup script finished (reported removing {elements_removed_count} elements).")
|
|
|
|
|
|
# --- Optional: Emulate Print Media ---
|
|
# print("Emulating print media...")
|
|
# page.emulate_media(media='print')
|
|
|
|
# --- Generate PDF ---
|
|
# logging.info(f"Generating PDF: {pdf_path}") # Example logging
|
|
print(f"Generating PDF: {pdf_path}")
|
|
pdf_options = {
|
|
"path": pdf_path, "format": "A4", "print_background": True,
|
|
"margin": {'top': '20px', 'bottom': '20px', 'left': '20px', 'right': '20px'},
|
|
"scale": 1.0
|
|
}
|
|
page.pdf(**pdf_options)
|
|
# logging.info(f"PDF saved successfully to: {pdf_path}") # Example logging
|
|
print(f"PDF saved successfully to: {pdf_path}")
|
|
|
|
# logging.debug("Closing browser.") # Example logging
|
|
print("Closing browser.")
|
|
browser.close()
|
|
return True # Indicate success
|
|
|
|
except TimeoutError as e:
|
|
print(f"A Playwright TimeoutError occurred: {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Playwright TimeoutError occurred: {e}", exc_info=True) # Example logging
|
|
browser.close() # Ensure browser is closed on error
|
|
return False # Indicate failure
|
|
except Exception as e: # Catch other potential errors during Playwright page operations
|
|
print(f"An unexpected error occurred during Playwright page operations: {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Unexpected error during Playwright page operations: {e}", exc_info=True) # Example logging
|
|
# Optional: Save HTML state on error
|
|
try:
|
|
html_content = page.content()
|
|
error_html_path = pdf_path.replace('.pdf', '_error.html')
|
|
with open(error_html_path, 'w', encoding='utf-8') as f_err:
|
|
f_err.write(html_content)
|
|
# logging.info(f"Saved HTML state on error to: {error_html_path}") # Example logging
|
|
print(f"Saved HTML state on error to: {error_html_path}")
|
|
except Exception as save_e:
|
|
# logging.error(f"Could not save HTML state on error: {save_e}", exc_info=True) # Example logging
|
|
print(f"Could not save HTML state on error: {save_e}")
|
|
browser.close() # Ensure browser is closed on error
|
|
return False # Indicate failure
|
|
# Note: The finally block for the 'with sync_playwright()' context
|
|
# is handled automatically by the 'with' statement.
|
|
|
|
except Exception as e:
|
|
# Catch errors during Playwright startup (less common)
|
|
print(f"An error occurred during Playwright setup/teardown: {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Error during Playwright setup/teardown: {e}", exc_info=True) # Example logging
|
|
return False # Indicate failure
|
|
|
|
|
|
# --- Example Usage ---
|
|
# result = generate_pdf_with_header_and_cleanup(
|
|
# url='path/to/your/input.html',
|
|
# pdf_path='output.pdf',
|
|
# substance_name='Glycerol Example',
|
|
# substance_link='http://example.com/glycerol',
|
|
# ec_number='200-289-5',
|
|
# cas_number='56-81-5',
|
|
# )
|
|
#
|
|
# if result:
|
|
# print("PDF Generation Succeeded.")
|
|
# # logging.info("Main script: PDF Generation Succeeded.") # Example logging
|
|
# else:
|
|
# print("PDF Generation Failed.")
|
|
# # logging.error("Main script: PDF Generation Failed.") # Example logging
|
|
|
|
|
|
def search_generate_pdfs(
|
|
cas_number_to_search: str,
|
|
page_types_to_extract: list[str],
|
|
base_output_folder: str = "data/library"
|
|
) -> bool:
|
|
"""
|
|
Searches for a substance by CAS, saves raw HTML and generates PDFs for
|
|
specified page types. Uses '_js' link variant for the PDF header link if available.
|
|
|
|
Args:
|
|
cas_number_to_search (str): CAS number to search for.
|
|
page_types_to_extract (list[str]): List of page type names (e.g., 'RepeatedDose').
|
|
Expects '{page_type}' and '{page_type}_js' keys
|
|
in the search result.
|
|
base_output_folder (str): Root directory for saving HTML/PDFs.
|
|
|
|
Returns:
|
|
bool: True if substance found and >=1 requested PDF generated, False otherwise.
|
|
"""
|
|
# logging.info(f"Starting process for CAS: {cas_number_to_search}")
|
|
print(f"\n===== Processing request for CAS: {cas_number_to_search} =====")
|
|
|
|
# --- 1. Search for Dossier Information ---
|
|
try:
|
|
# logging.debug(f"Calling search_dossier for CAS: {cas_number_to_search}")
|
|
search_result = search_dossier(substance=cas_number_to_search, input_type='rmlCas')
|
|
except Exception as e:
|
|
print(f"Error during dossier search for CAS '{cas_number_to_search}': {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Exception during search_dossier for CAS '{cas_number_to_search}': {e}", exc_info=True)
|
|
return False
|
|
|
|
if not search_result:
|
|
print(f"Substance not found or search failed for CAS: {cas_number_to_search}")
|
|
# logging.warning(f"Substance not found or search failed for CAS: {cas_number_to_search}")
|
|
return False
|
|
|
|
# logging.info(f"Substance found for CAS: {cas_number_to_search}")
|
|
print(f"Substance found: {search_result.get('rmlName', 'N/A')}")
|
|
|
|
# --- 2. Extract Details and Filter Pages ---
|
|
try:
|
|
# Extract required info
|
|
rml_id = search_result.get('rmlId')
|
|
rml_name = search_result.get('rmlName')
|
|
rml_cas = search_result.get('rmlCas')
|
|
rml_ec = search_result.get('rmlEc')
|
|
asset_ext_id = search_result.get('assetExternalId')
|
|
|
|
# Basic validation
|
|
if not all([rml_id, rml_name, rml_cas, rml_ec, asset_ext_id]):
|
|
missing_keys = [k for k, v in {'rmlId': rml_id, 'rmlName': rml_name, 'rmlCas': rml_cas, 'rmlEc': rml_ec, 'assetExternalId': asset_ext_id}.items() if not v]
|
|
message = f"Search result for {cas_number_to_search} is missing required keys: {missing_keys}"
|
|
print(f"Error: {message}")
|
|
# logging.error(message)
|
|
return False
|
|
|
|
# --- Filtering Logic - Collect pairs of URLs ---
|
|
pages_to_process_list = [] # Store tuples: (page_name, raw_url, js_url)
|
|
# logging.debug(f"Filtering pages. Requested: {page_types_to_extract}.")
|
|
|
|
for page_type in page_types_to_extract:
|
|
raw_url_key = page_type
|
|
js_url_key = f"{page_type}_js"
|
|
|
|
raw_url = search_result.get(raw_url_key)
|
|
js_url = search_result.get(js_url_key) # Get the JS URL
|
|
|
|
# Check if both URLs are valid strings
|
|
if raw_url and isinstance(raw_url, str) and raw_url.strip():
|
|
if js_url and isinstance(js_url, str) and js_url.strip():
|
|
pages_to_process_list.append((page_type, raw_url, js_url))
|
|
# logging.debug(f"Found valid pair for '{page_type}': Raw='{raw_url}', JS='{js_url}'")
|
|
else:
|
|
# Found raw URL but not a valid JS URL - skip this page type for PDF?
|
|
# Or use raw_url for header too? Let's skip if JS URL is missing/invalid.
|
|
print(f"Found raw URL for '{page_type}' but missing/invalid JS URL ('{js_url}'). Skipping PDF generation for this type.")
|
|
# logging.warning(f"Missing/invalid JS URL for page type '{page_type}' for {rml_cas}. Raw URL: '{raw_url}'.")
|
|
else:
|
|
# Raw URL missing or invalid
|
|
if page_type in search_result: # Check if key existed at all
|
|
print(f"Found page type key '{page_type}' for {rml_cas}, but its value is not a valid URL ('{raw_url}'). Skipping.")
|
|
# logging.warning(f"Invalid raw URL value for page type '{page_type}' for {rml_cas}: '{raw_url}'.")
|
|
else:
|
|
print(f"Requested page type key '{page_type}' not found in search results for {rml_cas}.")
|
|
# logging.warning(f"Requested page type key '{page_type}' not found for {rml_cas}.")
|
|
# --- End Filtering Logic ---
|
|
|
|
if not pages_to_process_list:
|
|
print(f"After filtering, no requested page types ({page_types_to_extract}) resulted in a valid pair of Raw and JS URLs for substance {rml_cas}.")
|
|
# logging.warning(f"No pages with valid URL pairs to process for substance {rml_cas}.")
|
|
return False # Nothing to generate
|
|
|
|
except Exception as e:
|
|
print(f"Error processing search result for '{cas_number_to_search}': {e}")
|
|
traceback.print_exc()
|
|
# logging.error(f"Error processing search result for '{cas_number_to_search}': {e}", exc_info=True)
|
|
return False
|
|
|
|
# --- 3. Prepare Folders ---
|
|
safe_cas = rml_cas.replace('/', '_').replace('\\', '_')
|
|
substance_folder_name = f"{safe_cas}_{rml_ec}_{rml_id}"
|
|
substance_folder_path = os.path.join(base_output_folder, substance_folder_name)
|
|
|
|
try:
|
|
os.makedirs(substance_folder_path, exist_ok=True)
|
|
# logging.info(f"Ensured output directory exists: {substance_folder_path}")
|
|
print(f"Ensured output directory exists: {substance_folder_path}")
|
|
except OSError as e:
|
|
print(f"Error creating directory {substance_folder_path}: {e}")
|
|
# logging.error(f"Failed to create directory {substance_folder_path}: {e}", exc_info=True)
|
|
return False
|
|
|
|
|
|
# --- 4. Process Each Page (Save HTML, Generate PDF) ---
|
|
successful_pages = [] # Track successful PDF generations
|
|
overall_success = False # Track if any PDF was generated
|
|
|
|
for page_name, raw_html_url, js_header_link in pages_to_process_list:
|
|
print(f"\nProcessing page: {page_name}")
|
|
base_filename = f"{safe_cas}_{page_name}"
|
|
html_filename = f"{base_filename}.html"
|
|
pdf_filename = f"{base_filename}.pdf"
|
|
html_full_path = os.path.join(substance_folder_path, html_filename)
|
|
pdf_full_path = os.path.join(substance_folder_path, pdf_filename)
|
|
|
|
# --- Save Raw HTML ---
|
|
html_saved = False
|
|
try:
|
|
# logging.debug(f"Fetching raw HTML for {page_name} from {raw_html_url}")
|
|
print(f"Fetching raw HTML from: {raw_html_url}")
|
|
# Add headers to mimic a browser slightly if needed
|
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
|
response = requests.get(raw_html_url, timeout=30, headers=headers) # 30s timeout
|
|
response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
|
|
|
|
# Decide encoding - response.text tries to guess, or use apparent_encoding
|
|
# Or assume utf-8 if unsure, which is common.
|
|
html_content = response.content.decode('utf-8', errors='replace')
|
|
|
|
with open(html_full_path, 'w', encoding='utf-8') as f:
|
|
f.write(html_content)
|
|
html_saved = True
|
|
# logging.info(f"Successfully saved raw HTML for {page_name} to {html_full_path}")
|
|
print(f"Successfully saved raw HTML to: {html_full_path}")
|
|
except requests.exceptions.RequestException as req_e:
|
|
print(f"Error fetching raw HTML for {page_name} from {raw_html_url}: {req_e}")
|
|
# logging.error(f"Error fetching raw HTML for {page_name}: {req_e}", exc_info=True)
|
|
except IOError as io_e:
|
|
print(f"Error saving raw HTML for {page_name} to {html_full_path}: {io_e}")
|
|
# logging.error(f"Error saving raw HTML for {page_name}: {io_e}", exc_info=True)
|
|
except Exception as e: # Catch other potential errors like decoding
|
|
print(f"Unexpected error saving HTML for {page_name}: {e}")
|
|
# logging.error(f"Unexpected error saving HTML for {page_name}: {e}", exc_info=True)
|
|
|
|
# --- Generate PDF (using raw URL for content, JS URL for header link) ---
|
|
# logging.info(f"Generating PDF for {page_name} from {raw_html_url}")
|
|
print(f"Generating PDF using content from: {raw_html_url}")
|
|
pdf_success = generate_pdf_with_header_and_cleanup(
|
|
url=raw_html_url, # Use raw URL for Playwright navigation/content
|
|
pdf_path=pdf_full_path,
|
|
substance_name=rml_name,
|
|
substance_link=js_header_link, # Use JS URL for the link in the header
|
|
ec_number=rml_ec,
|
|
cas_number=rml_cas
|
|
)
|
|
|
|
if pdf_success:
|
|
successful_pages.append(page_name) # Log success based on PDF generation
|
|
overall_success = True
|
|
# logging.info(f"Successfully generated PDF for {page_name} at {pdf_full_path}")
|
|
print(f"Successfully generated PDF for {page_name}")
|
|
else:
|
|
# logging.error(f"Failed to generate PDF for {page_name} from {raw_html_url}")
|
|
print(f"Failed to generate PDF for {page_name}")
|
|
# Decide if failure to save HTML should affect overall success or logging...
|
|
# Currently, success is tied only to PDF generation.
|
|
|
|
print(f"===== Finished request for CAS: {cas_number_to_search} =====")
|
|
print(f"Successfully generated {len(successful_pages)} PDFs: {successful_pages}")
|
|
return overall_success # Return success based on PDF generation |