diff --git a/src/RealCV.Application/Interfaces/IInternationalCompanyVerifierService.cs b/src/RealCV.Application/Interfaces/IInternationalCompanyVerifierService.cs deleted file mode 100644 index b2e14f4..0000000 --- a/src/RealCV.Application/Interfaces/IInternationalCompanyVerifierService.cs +++ /dev/null @@ -1,49 +0,0 @@ -using RealCV.Application.Models; - -namespace RealCV.Application.Interfaces; - -/// -/// Service for verifying international companies via OpenCorporates -/// -public interface IInternationalCompanyVerifierService -{ - /// - /// Verify an international company - /// - Task VerifyCompanyAsync( - string companyName, - string? jurisdiction = null, - DateOnly? claimedStartDate = null, - DateOnly? claimedEndDate = null); - - /// - /// Search for companies across all jurisdictions - /// - Task> SearchCompaniesAsync( - string query, - string? jurisdiction = null); - - /// - /// Get list of supported jurisdictions - /// - Task> GetJurisdictionsAsync(); -} - -public sealed record OpenCorporatesSearchResult -{ - public required string CompanyName { get; init; } - public required string CompanyNumber { get; init; } - public required string Jurisdiction { get; init; } - public string? JurisdictionCode { get; init; } - public string? Status { get; init; } - public DateOnly? IncorporationDate { get; init; } - public string? OpenCorporatesUrl { get; init; } - public double? MatchScore { get; init; } -} - -public sealed record JurisdictionInfo -{ - public required string Code { get; init; } - public required string Name { get; init; } - public string? Country { get; init; } -} diff --git a/src/RealCV.Application/Models/InternationalCompanyResult.cs b/src/RealCV.Application/Models/InternationalCompanyResult.cs deleted file mode 100644 index 33d232c..0000000 --- a/src/RealCV.Application/Models/InternationalCompanyResult.cs +++ /dev/null @@ -1,30 +0,0 @@ -namespace RealCV.Application.Models; - -/// -/// Result of verifying an international company via OpenCorporates -/// -public sealed record InternationalCompanyResult -{ - public required string ClaimedCompany { get; init; } - public required string ClaimedJurisdiction { get; init; } - public required bool IsVerified { get; init; } - - // Matched company details - public string? MatchedCompanyName { get; init; } - public string? CompanyNumber { get; init; } - public string? Jurisdiction { get; init; } - public string? JurisdictionCode { get; init; } - public string? CompanyType { get; init; } - public string? Status { get; init; } - public DateOnly? IncorporationDate { get; init; } - public DateOnly? DissolutionDate { get; init; } - public string? RegisteredAddress { get; init; } - - // OpenCorporates specific - public string? OpenCorporatesUrl { get; init; } - public DateTime? DataLastUpdated { get; init; } - - public int MatchScore { get; init; } - public string? VerificationNotes { get; init; } - public List Flags { get; init; } = []; -} diff --git a/src/RealCV.Infrastructure/Clients/OpenCorporatesClient.cs b/src/RealCV.Infrastructure/Clients/OpenCorporatesClient.cs deleted file mode 100644 index 61a5504..0000000 --- a/src/RealCV.Infrastructure/Clients/OpenCorporatesClient.cs +++ /dev/null @@ -1,270 +0,0 @@ -using System.Net.Http.Json; -using System.Text.Json; -using System.Text.Json.Serialization; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; - -namespace RealCV.Infrastructure.Clients; - -public sealed class OpenCorporatesClient -{ - private readonly HttpClient _httpClient; - private readonly ILogger _logger; - private readonly string _apiToken; - - public OpenCorporatesClient( - HttpClient httpClient, - IOptions options, - ILogger logger) - { - _httpClient = httpClient; - _logger = logger; - _apiToken = options.Value.ApiToken; - - _httpClient.BaseAddress = new Uri("https://api.opencorporates.com/v0.4/"); - } - - public async Task SearchCompaniesAsync( - string query, - string? jurisdiction = null, - int perPage = 30, - int page = 1) - { - try - { - var encodedQuery = Uri.EscapeDataString(query); - var url = $"companies/search?q={encodedQuery}&per_page={perPage}&page={page}"; - - if (!string.IsNullOrEmpty(jurisdiction)) - { - url += $"&jurisdiction_code={Uri.EscapeDataString(jurisdiction)}"; - } - - if (!string.IsNullOrEmpty(_apiToken)) - { - url += $"&api_token={_apiToken}"; - } - - _logger.LogDebug("Searching OpenCorporates: {Query}", query); - - var response = await _httpClient.GetAsync(url); - - if (!response.IsSuccessStatusCode) - { - _logger.LogWarning("OpenCorporates API returned {StatusCode} for search: {Query}", - response.StatusCode, query); - return null; - } - - var wrapper = await response.Content.ReadFromJsonAsync>(JsonOptions); - return wrapper?.Results; - } - catch (Exception ex) - { - _logger.LogError(ex, "Error searching OpenCorporates: {Query}", query); - return null; - } - } - - public async Task GetCompanyAsync(string jurisdictionCode, string companyNumber) - { - try - { - var url = $"companies/{Uri.EscapeDataString(jurisdictionCode)}/{Uri.EscapeDataString(companyNumber)}"; - - if (!string.IsNullOrEmpty(_apiToken)) - { - url += $"?api_token={_apiToken}"; - } - - _logger.LogDebug("Getting OpenCorporates company: {Jurisdiction}/{CompanyNumber}", - jurisdictionCode, companyNumber); - - var response = await _httpClient.GetAsync(url); - - if (!response.IsSuccessStatusCode) - { - return null; - } - - var wrapper = await response.Content.ReadFromJsonAsync(JsonOptions); - return wrapper?.Results?.Company; - } - catch (Exception ex) - { - _logger.LogError(ex, "Error getting OpenCorporates company: {Jurisdiction}/{CompanyNumber}", - jurisdictionCode, companyNumber); - return null; - } - } - - public async Task?> GetJurisdictionsAsync() - { - try - { - var url = "jurisdictions"; - - if (!string.IsNullOrEmpty(_apiToken)) - { - url += $"?api_token={_apiToken}"; - } - - var response = await _httpClient.GetAsync(url); - - if (!response.IsSuccessStatusCode) - { - return null; - } - - var wrapper = await response.Content.ReadFromJsonAsync(JsonOptions); - return wrapper?.Results?.Jurisdictions?.Select(j => j.Jurisdiction).Where(j => j != null).ToList()!; - } - catch (Exception ex) - { - _logger.LogError(ex, "Error getting OpenCorporates jurisdictions"); - return null; - } - } - - private static readonly JsonSerializerOptions JsonOptions = new() - { - PropertyNameCaseInsensitive = true, - DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull - }; -} - -public class OpenCorporatesOptions -{ - public string ApiToken { get; set; } = string.Empty; -} - -// Response wrapper -public class OpenCorporatesResponseWrapper -{ - [JsonPropertyName("api_version")] - public string? ApiVersion { get; set; } - - public T? Results { get; set; } -} - -// Search response -public class OpenCorporatesSearchResponse -{ - public List? Companies { get; set; } - public int Page { get; set; } - - [JsonPropertyName("per_page")] - public int PerPage { get; set; } - - [JsonPropertyName("total_pages")] - public int TotalPages { get; set; } - - [JsonPropertyName("total_count")] - public int TotalCount { get; set; } -} - -public class OpenCorporatesCompanyWrapper -{ - public OpenCorporatesCompany? Company { get; set; } - public OpenCorporatesCompanyResults? Results { get; set; } -} - -public class OpenCorporatesCompanyResults -{ - public OpenCorporatesCompany? Company { get; set; } -} - -public class OpenCorporatesCompany -{ - public string? Name { get; set; } - - [JsonPropertyName("company_number")] - public string? CompanyNumber { get; set; } - - [JsonPropertyName("jurisdiction_code")] - public string? JurisdictionCode { get; set; } - - [JsonPropertyName("incorporation_date")] - public string? IncorporationDate { get; set; } - - [JsonPropertyName("dissolution_date")] - public string? DissolutionDate { get; set; } - - [JsonPropertyName("company_type")] - public string? CompanyType { get; set; } - - [JsonPropertyName("registry_url")] - public string? RegistryUrl { get; set; } - - [JsonPropertyName("branch_status")] - public string? BranchStatus { get; set; } - - [JsonPropertyName("current_status")] - public string? CurrentStatus { get; set; } - - [JsonPropertyName("opencorporates_url")] - public string? OpencorporatesUrl { get; set; } - - [JsonPropertyName("registered_address_in_full")] - public string? RegisteredAddressInFull { get; set; } - - [JsonPropertyName("retrieved_at")] - public DateTime? RetrievedAt { get; set; } - - [JsonPropertyName("updated_at")] - public DateTime? UpdatedAt { get; set; } - - public OpenCorporatesAddress? RegisteredAddress { get; set; } - - [JsonPropertyName("industry_codes")] - public List? IndustryCodes { get; set; } -} - -public class OpenCorporatesAddress -{ - [JsonPropertyName("street_address")] - public string? StreetAddress { get; set; } - - public string? Locality { get; set; } - public string? Region { get; set; } - - [JsonPropertyName("postal_code")] - public string? PostalCode { get; set; } - - public string? Country { get; set; } -} - -public class OpenCorporatesIndustryCode -{ - public string? Code { get; set; } - public string? Description { get; set; } - - [JsonPropertyName("code_scheme_id")] - public string? CodeSchemeId { get; set; } -} - -// Jurisdictions -public class OpenCorporatesJurisdictionsWrapper -{ - public OpenCorporatesJurisdictionsList? Results { get; set; } -} - -public class OpenCorporatesJurisdictionsList -{ - public List? Jurisdictions { get; set; } -} - -public class OpenCorporatesJurisdictionWrapper -{ - public OpenCorporatesJurisdiction? Jurisdiction { get; set; } -} - -public class OpenCorporatesJurisdiction -{ - public string? Code { get; set; } - public string? Name { get; set; } - public string? Country { get; set; } - - [JsonPropertyName("full_name")] - public string? FullName { get; set; } -} diff --git a/src/RealCV.Infrastructure/DependencyInjection.cs b/src/RealCV.Infrastructure/DependencyInjection.cs index 9fab7d1..1e109f0 100644 --- a/src/RealCV.Infrastructure/DependencyInjection.cs +++ b/src/RealCV.Infrastructure/DependencyInjection.cs @@ -82,9 +82,6 @@ public static class DependencyInjection services.Configure( configuration.GetSection("GitHub")); - services.Configure( - configuration.GetSection("OpenCorporates")); - // Configure HttpClient for CompaniesHouseClient with retry policy services.AddHttpClient((serviceProvider, client) => { @@ -111,10 +108,6 @@ public static class DependencyInjection services.AddHttpClient() .AddPolicyHandler(GetRetryPolicy()); - // Configure HttpClient for OpenCorporates API - services.AddHttpClient() - .AddPolicyHandler(GetRetryPolicy()); - // Configure HttpClient for ORCID API services.AddHttpClient() .AddPolicyHandler(GetRetryPolicy()); @@ -132,7 +125,6 @@ public static class DependencyInjection // Register additional verification services services.AddScoped(); services.AddScoped(); - services.AddScoped(); services.AddScoped(); // Register file storage - use local storage if configured, otherwise Azure diff --git a/src/RealCV.Infrastructure/Services/InternationalCompanyVerifierService.cs b/src/RealCV.Infrastructure/Services/InternationalCompanyVerifierService.cs deleted file mode 100644 index 954c4b1..0000000 --- a/src/RealCV.Infrastructure/Services/InternationalCompanyVerifierService.cs +++ /dev/null @@ -1,376 +0,0 @@ -using Microsoft.Extensions.Logging; -using RealCV.Application.Interfaces; -using RealCV.Application.Models; -using RealCV.Infrastructure.Clients; - -namespace RealCV.Infrastructure.Services; - -public sealed class InternationalCompanyVerifierService : IInternationalCompanyVerifierService -{ - private readonly OpenCorporatesClient _openCorporatesClient; - private readonly ILogger _logger; - - // Common jurisdiction codes - private static readonly Dictionary CountryToJurisdiction = new(StringComparer.OrdinalIgnoreCase) - { - ["United Kingdom"] = "gb", - ["UK"] = "gb", - ["England"] = "gb", - ["Scotland"] = "gb", - ["Wales"] = "gb", - ["United States"] = "us", - ["USA"] = "us", - ["US"] = "us", - ["Germany"] = "de", - ["France"] = "fr", - ["Netherlands"] = "nl", - ["Ireland"] = "ie", - ["Spain"] = "es", - ["Italy"] = "it", - ["Canada"] = "ca", - ["Australia"] = "au", - ["New Zealand"] = "nz", - ["Singapore"] = "sg", - ["Hong Kong"] = "hk", - ["Japan"] = "jp", - ["India"] = "in", - ["China"] = "cn", - ["Brazil"] = "br", - ["Mexico"] = "mx", - ["Switzerland"] = "ch", - ["Sweden"] = "se", - ["Norway"] = "no", - ["Denmark"] = "dk", - ["Finland"] = "fi", - ["Belgium"] = "be", - ["Austria"] = "at", - ["Poland"] = "pl", - ["Portugal"] = "pt", - ["UAE"] = "ae", - ["South Africa"] = "za", - }; - - public InternationalCompanyVerifierService( - OpenCorporatesClient openCorporatesClient, - ILogger logger) - { - _openCorporatesClient = openCorporatesClient; - _logger = logger; - } - - public async Task VerifyCompanyAsync( - string companyName, - string? jurisdiction = null, - DateOnly? claimedStartDate = null, - DateOnly? claimedEndDate = null) - { - try - { - _logger.LogInformation("Searching OpenCorporates for: {Company} in {Jurisdiction}", - companyName, jurisdiction ?? "all jurisdictions"); - - string? jurisdictionCode = null; - if (!string.IsNullOrEmpty(jurisdiction) && CountryToJurisdiction.TryGetValue(jurisdiction, out var code)) - { - jurisdictionCode = code; - } - else if (!string.IsNullOrEmpty(jurisdiction) && jurisdiction.Length == 2) - { - jurisdictionCode = jurisdiction.ToLowerInvariant(); - } - - var searchResponse = await _openCorporatesClient.SearchCompaniesAsync( - companyName, - jurisdictionCode); - - if (searchResponse?.Companies == null || searchResponse.Companies.Count == 0) - { - return new InternationalCompanyResult - { - ClaimedCompany = companyName, - ClaimedJurisdiction = jurisdiction ?? "Unknown", - IsVerified = false, - VerificationNotes = "No matching companies found in OpenCorporates" - }; - } - - // Find the best match - var bestMatch = FindBestMatch(companyName, searchResponse.Companies); - - if (bestMatch == null) - { - return new InternationalCompanyResult - { - ClaimedCompany = companyName, - ClaimedJurisdiction = jurisdiction ?? "Unknown", - IsVerified = false, - VerificationNotes = $"Found {searchResponse.Companies.Count} results but no close name matches" - }; - } - - // Parse dates - var incorporationDate = ParseDate(bestMatch.IncorporationDate); - var dissolutionDate = ParseDate(bestMatch.DissolutionDate); - - // Calculate match score - var matchScore = CalculateMatchScore(companyName, bestMatch.Name ?? ""); - - // Check for timeline issues - var flags = new List(); - - if (claimedStartDate.HasValue && incorporationDate.HasValue && - claimedStartDate.Value < incorporationDate.Value) - { - flags.Add(new CompanyVerificationFlag - { - Type = "EmploymentBeforeIncorporation", - Severity = "Critical", - Message = $"Claimed start date ({claimedStartDate:yyyy-MM-dd}) is before company incorporation ({incorporationDate:yyyy-MM-dd})", - ScoreImpact = -30 - }); - } - - if (claimedEndDate.HasValue && dissolutionDate.HasValue && - claimedEndDate.Value > dissolutionDate.Value) - { - flags.Add(new CompanyVerificationFlag - { - Type = "EmploymentAfterDissolution", - Severity = "Warning", - Message = $"Claimed end date ({claimedEndDate:yyyy-MM-dd}) is after company dissolution ({dissolutionDate:yyyy-MM-dd})", - ScoreImpact = -20 - }); - } - - return new InternationalCompanyResult - { - ClaimedCompany = companyName, - ClaimedJurisdiction = jurisdiction ?? "Unknown", - IsVerified = true, - MatchedCompanyName = bestMatch.Name, - CompanyNumber = bestMatch.CompanyNumber, - Jurisdiction = GetJurisdictionName(bestMatch.JurisdictionCode), - JurisdictionCode = bestMatch.JurisdictionCode, - CompanyType = bestMatch.CompanyType, - Status = bestMatch.CurrentStatus, - IncorporationDate = incorporationDate, - DissolutionDate = dissolutionDate, - RegisteredAddress = bestMatch.RegisteredAddressInFull, - OpenCorporatesUrl = bestMatch.OpencorporatesUrl, - DataLastUpdated = bestMatch.UpdatedAt, - MatchScore = matchScore, - VerificationNotes = BuildVerificationSummary(bestMatch, searchResponse.TotalCount), - Flags = flags - }; - } - catch (Exception ex) - { - _logger.LogError(ex, "Error searching OpenCorporates for: {Company}", companyName); - return new InternationalCompanyResult - { - ClaimedCompany = companyName, - ClaimedJurisdiction = jurisdiction ?? "Unknown", - IsVerified = false, - VerificationNotes = $"Error during search: {ex.Message}" - }; - } - } - - public async Task> SearchCompaniesAsync( - string query, - string? jurisdiction = null) - { - try - { - string? jurisdictionCode = null; - if (!string.IsNullOrEmpty(jurisdiction) && CountryToJurisdiction.TryGetValue(jurisdiction, out var code)) - { - jurisdictionCode = code; - } - - var searchResponse = await _openCorporatesClient.SearchCompaniesAsync( - query, - jurisdictionCode); - - if (searchResponse?.Companies == null) - { - return []; - } - - return searchResponse.Companies - .Where(c => c.Company != null) - .Select(c => new OpenCorporatesSearchResult - { - CompanyName = c.Company!.Name ?? "Unknown", - CompanyNumber = c.Company.CompanyNumber ?? "Unknown", - Jurisdiction = GetJurisdictionName(c.Company.JurisdictionCode), - JurisdictionCode = c.Company.JurisdictionCode, - Status = c.Company.CurrentStatus, - IncorporationDate = ParseDate(c.Company.IncorporationDate), - OpenCorporatesUrl = c.Company.OpencorporatesUrl, - MatchScore = CalculateMatchScore(query, c.Company.Name ?? "") - }) - .ToList(); - } - catch (Exception ex) - { - _logger.LogError(ex, "Error searching OpenCorporates for: {Query}", query); - return []; - } - } - - public async Task> GetJurisdictionsAsync() - { - try - { - var jurisdictions = await _openCorporatesClient.GetJurisdictionsAsync(); - - if (jurisdictions == null) - { - return []; - } - - return jurisdictions - .Select(j => new JurisdictionInfo - { - Code = j.Code ?? "Unknown", - Name = j.FullName ?? j.Name ?? "Unknown", - Country = j.Country - }) - .ToList(); - } - catch (Exception ex) - { - _logger.LogError(ex, "Error getting OpenCorporates jurisdictions"); - return []; - } - } - - private static OpenCorporatesCompany? FindBestMatch( - string searchName, - List companies) - { - var normalizedSearch = NormalizeName(searchName); - - // First try exact match - var exactMatch = companies - .Select(c => c.Company) - .Where(c => c != null) - .FirstOrDefault(c => NormalizeName(c!.Name ?? "").Equals(normalizedSearch, StringComparison.OrdinalIgnoreCase)); - - if (exactMatch != null) - return exactMatch; - - // Then try contains match, preferring active companies - var containsMatches = companies - .Select(c => c.Company) - .Where(c => c != null && !string.IsNullOrEmpty(c.Name)) - .Where(c => NormalizeName(c!.Name!).Contains(normalizedSearch, StringComparison.OrdinalIgnoreCase) || - normalizedSearch.Contains(NormalizeName(c!.Name!), StringComparison.OrdinalIgnoreCase)) - .OrderBy(c => c!.CurrentStatus?.ToLowerInvariant() == "active" ? 0 : 1) - .ThenBy(c => c!.Name!.Length) - .ToList(); - - return containsMatches.FirstOrDefault(); - } - - private static string NormalizeName(string name) - { - return name - .Replace("LIMITED", "", StringComparison.OrdinalIgnoreCase) - .Replace("LTD", "", StringComparison.OrdinalIgnoreCase) - .Replace("PLC", "", StringComparison.OrdinalIgnoreCase) - .Replace("INC", "", StringComparison.OrdinalIgnoreCase) - .Replace("CORP", "", StringComparison.OrdinalIgnoreCase) - .Replace("LLC", "", StringComparison.OrdinalIgnoreCase) - .Replace("GMBH", "", StringComparison.OrdinalIgnoreCase) - .Replace("AG", "", StringComparison.OrdinalIgnoreCase) - .Replace(".", "") - .Replace(",", "") - .Trim(); - } - - private static string GetJurisdictionName(string? code) - { - if (string.IsNullOrEmpty(code)) - return "Unknown"; - - return code.ToUpperInvariant(); - } - - private static DateOnly? ParseDate(string? dateString) - { - if (string.IsNullOrEmpty(dateString)) - return null; - - if (DateOnly.TryParse(dateString, out var date)) - return date; - - return null; - } - - private static int CalculateMatchScore(string searchName, string foundName) - { - var normalizedSearch = NormalizeName(searchName).ToLowerInvariant(); - var normalizedFound = NormalizeName(foundName).ToLowerInvariant(); - - if (normalizedSearch == normalizedFound) - return 100; - - if (normalizedFound.Contains(normalizedSearch) || normalizedSearch.Contains(normalizedFound)) - return 80; - - // Calculate Levenshtein similarity - var distance = LevenshteinDistance(normalizedSearch, normalizedFound); - var maxLength = Math.Max(normalizedSearch.Length, normalizedFound.Length); - var similarity = (int)((1 - (double)distance / maxLength) * 100); - - return Math.Max(0, similarity); - } - - private static int LevenshteinDistance(string s1, string s2) - { - var n = s1.Length; - var m = s2.Length; - var d = new int[n + 1, m + 1]; - - for (var i = 0; i <= n; i++) - d[i, 0] = i; - - for (var j = 0; j <= m; j++) - d[0, j] = j; - - for (var i = 1; i <= n; i++) - { - for (var j = 1; j <= m; j++) - { - var cost = s1[i - 1] == s2[j - 1] ? 0 : 1; - d[i, j] = Math.Min(Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1), d[i - 1, j - 1] + cost); - } - } - - return d[n, m]; - } - - private static string BuildVerificationSummary(OpenCorporatesCompany company, int totalResults) - { - var parts = new List(); - - if (!string.IsNullOrEmpty(company.CurrentStatus)) - parts.Add($"Status: {company.CurrentStatus}"); - - if (!string.IsNullOrEmpty(company.JurisdictionCode)) - parts.Add($"Jurisdiction: {company.JurisdictionCode.ToUpperInvariant()}"); - - if (!string.IsNullOrEmpty(company.IncorporationDate)) - parts.Add($"Incorporated: {company.IncorporationDate}"); - - if (!string.IsNullOrEmpty(company.CompanyType)) - parts.Add($"Type: {company.CompanyType}"); - - if (totalResults > 1) - parts.Add($"({totalResults} total matches found)"); - - return string.Join(" | ", parts); - } -} diff --git a/src/RealCV.Web/appsettings.json b/src/RealCV.Web/appsettings.json index f01272f..272bc79 100644 --- a/src/RealCV.Web/appsettings.json +++ b/src/RealCV.Web/appsettings.json @@ -25,9 +25,6 @@ "GitHub": { "PersonalAccessToken": "" }, - "OpenCorporates": { - "ApiToken": "" - }, "Serilog": { "MinimumLevel": { "Default": "Information",