""" Tests for ECHA Find Service Test coverage: - search_dossier: Complete workflow for searching ECHA dossiers - Substance search (by CAS, EC, rmlName) - Dossier retrieval (Active/Inactive) - HTML parsing for toxicology sections - Error handling and edge cases """ import pytest from unittest.mock import Mock, patch, MagicMock from datetime import datetime from pif_compiler.services.echa_find import search_dossier class TestSearchDossierSubstanceSearch: """Test the initial substance search phase of search_dossier.""" @patch('pif_compiler.services.echa_find.requests.get') def test_successful_cas_search(self, mock_get): """Test successful search by CAS number.""" # Mock the substance search API response mock_response = Mock() mock_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_get.return_value = mock_response # Mocking all subsequent calls with patch('pif_compiler.services.echa_find.requests.get') as mock_all_gets: # First call: substance search (already mocked above) # Second call: dossier list mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } # Third call: index.html page mock_index_response = Mock() mock_index_response.text = """
""" mock_all_gets.side_effect = [ mock_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert result["rmlCas"] == "50-00-0" assert result["rmlName"] == "Test Substance" assert result["rmlId"] == "100.000.001" assert result["rmlEc"] == "200-001-8" @patch('pif_compiler.services.echa_find.requests.get') def test_successful_ec_search(self, mock_get): """Test successful search by EC number.""" mock_response = Mock() mock_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_get.return_value = mock_response with patch('pif_compiler.services.echa_find.requests.get') as mock_all_gets: mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_all_gets.side_effect = [ mock_response, mock_dossier_response, mock_index_response ] result = search_dossier("200-001-8", input_type="rmlEc") assert result is not False assert result["rmlEc"] == "200-001-8" @patch('pif_compiler.services.echa_find.requests.get') def test_successful_name_search(self, mock_get): """Test successful search by substance name.""" mock_response = Mock() mock_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "formaldehyde", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_get.return_value = mock_response with patch('pif_compiler.services.echa_find.requests.get') as mock_all_gets: mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_all_gets.side_effect = [ mock_response, mock_dossier_response, mock_index_response ] result = search_dossier("formaldehyde", input_type="rmlName") assert result is not False assert result["rmlName"] == "formaldehyde" @patch('pif_compiler.services.echa_find.requests.get') def test_substance_not_found(self, mock_get): """Test when substance is not found in ECHA.""" mock_response = Mock() mock_response.json.return_value = {"items": []} mock_get.return_value = mock_response result = search_dossier("999-99-9", input_type="rmlCas") assert result is False @patch('pif_compiler.services.echa_find.requests.get') def test_empty_items_array(self, mock_get): """Test when API returns empty items array.""" mock_response = Mock() mock_response.json.return_value = {"items": []} mock_get.return_value = mock_response result = search_dossier("NONEXISTENT", input_type="rmlName") assert result is False @patch('pif_compiler.services.echa_find.requests.get') def test_malformed_api_response(self, mock_get): """Test when API response is malformed.""" mock_response = Mock() mock_response.json.return_value = {} # Missing 'items' key mock_get.return_value = mock_response result = search_dossier("50-00-0", input_type="rmlCas") assert result is False class TestSearchDossierInputTypeValidation: """Test input_type parameter validation.""" @patch('pif_compiler.services.echa_find.requests.get') def test_input_type_mismatch_cas(self, mock_get): """Test when input_type doesn't match actual search result (CAS).""" mock_response = Mock() mock_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_get.return_value = mock_response # Search with CAS but specify wrong input_type result = search_dossier("50-00-0", input_type="rmlEc") assert isinstance(result, str) assert "search_error" in result assert "not equal" in result @patch('pif_compiler.services.echa_find.requests.get') def test_input_type_correct_match(self, mock_get): """Test when input_type correctly matches search result.""" mock_response = Mock() mock_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_get.return_value = mock_response with patch('pif_compiler.services.echa_find.requests.get') as mock_all_gets: mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_all_gets.side_effect = [ mock_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert isinstance(result, dict) class TestSearchDossierDossierRetrieval: """Test dossier retrieval (Active/Inactive).""" @patch('pif_compiler.services.echa_find.requests.get') def test_active_dossier_found(self, mock_get): """Test when active dossier is found.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert result["dossierType"] == "Active" @patch('pif_compiler.services.echa_find.requests.get') def test_inactive_dossier_fallback(self, mock_get): """Test when only inactive dossier exists (fallback).""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } # First dossier call returns empty (no active) mock_active_dossier_response = Mock() mock_active_dossier_response.json.return_value = {"items": []} # Second dossier call returns inactive mock_inactive_dossier_response = Mock() mock_inactive_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_get.side_effect = [ mock_substance_response, mock_active_dossier_response, mock_inactive_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert result["dossierType"] == "Inactive" @patch('pif_compiler.services.echa_find.requests.get') def test_no_dossiers_found(self, mock_get): """Test when no dossiers (active or inactive) are found.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } # Both active and inactive return empty mock_empty_response = Mock() mock_empty_response.json.return_value = {"items": []} mock_get.side_effect = [ mock_substance_response, mock_empty_response, # Active mock_empty_response # Inactive ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is False @patch('pif_compiler.services.echa_find.requests.get') def test_last_update_date_parsed(self, mock_get): """Test that lastUpdateDate is correctly parsed.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "
" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "lastUpdateDate" in result assert result["lastUpdateDate"] == "2024-01-15" @patch('pif_compiler.services.echa_find.requests.get') def test_missing_last_update_date(self, mock_get): """Test when lastUpdateDate is missing from response.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123" # lastUpdatedDate missing }] } mock_index_response = Mock() mock_index_response.text = "
" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False # Should still work, just without lastUpdateDate assert "lastUpdateDate" not in result class TestSearchDossierHTMLParsing: """Test HTML parsing for toxicology sections.""" @patch('pif_compiler.services.echa_find.requests.get') def test_all_tox_sections_found(self, mock_get): """Test when all toxicology sections are found.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = """
""" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "ToxSummary" in result assert "AcuteToxicity" in result assert "RepeatedDose" in result assert "tox_summary_001" in result["ToxSummary"] assert "acute_tox_001" in result["AcuteToxicity"] assert "repeated_dose_001" in result["RepeatedDose"] @patch('pif_compiler.services.echa_find.requests.get') def test_only_tox_summary_found(self, mock_get): """Test when only ToxSummary section exists.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = """
""" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "ToxSummary" in result assert "AcuteToxicity" not in result assert "RepeatedDose" not in result @patch('pif_compiler.services.echa_find.requests.get') def test_no_tox_sections_found(self, mock_get): """Test when no toxicology sections are found.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "No toxicology sections" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "ToxSummary" not in result assert "AcuteToxicity" not in result assert "RepeatedDose" not in result # Basic info should still be present assert "rmlId" in result assert "index" in result @patch('pif_compiler.services.echa_find.requests.get') def test_js_links_created(self, mock_get): """Test that both HTML and JS links are created.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = """
""" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "ToxSummary" in result assert "ToxSummary_js" in result assert "AcuteToxicity" in result assert "AcuteToxicity_js" in result assert "index" in result assert "index_js" in result class TestSearchDossierURLConstruction: """Test URL construction for various endpoints.""" @patch('pif_compiler.services.echa_find.requests.get') def test_search_response_url(self, mock_get): """Test that search_response URL is correctly constructed.""" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": "Test Substance", "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier("50-00-0", input_type="rmlCas") assert result is not False assert "search_response" in result assert "50-00-0" in result["search_response"] assert "https://chem.echa.europa.eu/api-substance/v1/substance" in result["search_response"] @patch('pif_compiler.services.echa_find.requests.get') def test_url_encoding(self, mock_get): """Test that special characters in substance names are URL-encoded.""" substance_name = "test substance with spaces" mock_substance_response = Mock() mock_substance_response.json.return_value = { "items": [{ "substanceIndex": { "rmlId": "100.000.001", "rmlName": substance_name, "rmlCas": "50-00-0", "rmlEc": "200-001-8" } }] } mock_dossier_response = Mock() mock_dossier_response.json.return_value = { "items": [{ "assetExternalId": "abc123", "rootKey": "key123", "lastUpdatedDate": "2024-01-15T10:30:00Z" }] } mock_index_response = Mock() mock_index_response.text = "" mock_get.side_effect = [ mock_substance_response, mock_dossier_response, mock_index_response ] result = search_dossier(substance_name, input_type="rmlName") assert result is not False assert "search_response" in result # Spaces should be encoded assert "%20" in result["search_response"] or "+" in result["search_response"] class TestIntegration: """Integration tests with real API (marked as integration).""" @pytest.mark.integration def test_real_formaldehyde_search(self): """Test real API call for formaldehyde (requires internet).""" result = search_dossier("50-00-0", input_type="rmlCas") if result and isinstance(result, dict): # Real API call succeeded assert "rmlId" in result assert "rmlName" in result assert "rmlCas" in result assert result["rmlCas"] == "50-00-0" assert "index" in result assert "dossierType" in result @pytest.mark.integration def test_real_water_search(self): """Test real API call for water by CAS (requires internet).""" result = search_dossier("7732-18-5", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "7732-18-5" @pytest.mark.integration def test_real_nonexistent_substance(self): """Test real API call for non-existent substance (requires internet).""" result = search_dossier("999-99-9", input_type="rmlCas") # Should return False for non-existent substance assert result is False or isinstance(result, str) @pytest.mark.integration def test_real_glycerin_search(self): """Test real API call for glycerin (requires internet).""" result = search_dossier("56-81-5", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "56-81-5" assert "rmlId" in result assert "dossierType" in result @pytest.mark.integration def test_real_niacinamide_search(self): """Test real API call for niacinamide (requires internet).""" result = search_dossier("98-92-0", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "98-92-0" @pytest.mark.integration def test_real_retinol_search(self): """Test real API call for retinol (requires internet).""" result = search_dossier("68-26-8", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "68-26-8" @pytest.mark.integration def test_real_caffeine_search(self): """Test real API call for caffeine (requires internet).""" result = search_dossier("58-08-2", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "58-08-2" @pytest.mark.integration def test_real_salicylic_acid_search(self): """Test real API call for salicylic acid (requires internet).""" result = search_dossier("69-72-7", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "69-72-7" @pytest.mark.integration def test_real_titanium_dioxide_search(self): """Test real API call for titanium dioxide (requires internet).""" result = search_dossier("13463-67-7", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "13463-67-7" @pytest.mark.integration def test_real_zinc_oxide_search(self): """Test real API call for zinc oxide (requires internet).""" result = search_dossier("1314-13-2", input_type="rmlCas") if result and isinstance(result, dict): assert "rmlCas" in result assert result["rmlCas"] == "1314-13-2" @pytest.mark.integration def test_multiple_cosmetic_ingredients(self, sample_cas_numbers): """Test real API calls for multiple cosmetic ingredients (requires internet).""" # Test a subset of common cosmetic ingredients test_ingredients = [ ("water", "7732-18-5"), ("glycerin", "56-81-5"), ("propylene_glycol", "57-55-6"), ] for name, cas in test_ingredients: result = search_dossier(cas, input_type="rmlCas") if result and isinstance(result, dict): assert result["rmlCas"] == cas assert "rmlId" in result # Give the API some time between requests import time time.sleep(0.5)