Compare commits
9 Commits
94ca6e1b9a
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
| 135e774f71 | |||
| 45812420f5 | |||
| 883d9afa2d | |||
| 983fb5bd67 | |||
| 232036746f | |||
| 2a96a4bfaf | |||
| 4b87af80a8 | |||
| 9cb8c35616 | |||
| 3d666d5f9c |
@@ -8,9 +8,14 @@ public interface ICompanyNameMatcherService
|
||||
/// Uses AI to semantically compare a company name from a CV against Companies House candidates.
|
||||
/// Returns the best match with confidence score and reasoning.
|
||||
/// </summary>
|
||||
/// <param name="cvCompanyName">The company name as written on the CV</param>
|
||||
/// <param name="candidates">List of potential matches from Companies House</param>
|
||||
/// <param name="industryHint">Optional industry context for well-known brands (e.g., "pharmacy/healthcare retail")</param>
|
||||
/// <param name="cancellationToken">Cancellation token</param>
|
||||
Task<SemanticMatchResult?> FindBestMatchAsync(
|
||||
string cvCompanyName,
|
||||
List<CompanyCandidate> candidates,
|
||||
string? industryHint = null,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -92,6 +92,19 @@ public sealed class ProcessCVCheckJob
|
||||
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
|
||||
cvCheckId, cvData.Employment.Count);
|
||||
|
||||
// Validate that the CV contains meaningful data
|
||||
// A CV with no name, no employment AND no education is likely a parsing failure
|
||||
if (cvData.Employment.Count == 0 && cvData.Education.Count == 0 &&
|
||||
(string.IsNullOrWhiteSpace(cvData.FullName) || cvData.FullName == "Unknown"))
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"CV check {CheckId} parsed with no extractable data - possible scanned/image PDF or parsing failure",
|
||||
cvCheckId);
|
||||
throw new InvalidOperationException(
|
||||
"Could not extract any employment or education data from this CV. " +
|
||||
"The file may be a scanned image, password-protected, or in an unsupported format.");
|
||||
}
|
||||
|
||||
// Step 4: Save extracted data
|
||||
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
|
||||
cvCheck.ProcessingStage = "Verifying Employment";
|
||||
@@ -279,6 +292,8 @@ public sealed class ProcessCVCheckJob
|
||||
try
|
||||
{
|
||||
cvCheck.Status = CheckStatus.Failed;
|
||||
// Store a user-friendly error message
|
||||
cvCheck.ProcessingStage = GetUserFriendlyErrorMessage(ex);
|
||||
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
|
||||
await _dbContext.SaveChangesAsync(CancellationToken.None);
|
||||
}
|
||||
@@ -1411,4 +1426,39 @@ public sealed class ProcessCVCheckJob
|
||||
obj.FlagType?.ToUpperInvariant() ?? "");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a user-friendly error message based on the exception type.
|
||||
/// </summary>
|
||||
private static string GetUserFriendlyErrorMessage(Exception ex)
|
||||
{
|
||||
// Check for specific error patterns
|
||||
var message = ex.Message;
|
||||
|
||||
if (message.Contains("no extractable data", StringComparison.OrdinalIgnoreCase) ||
|
||||
message.Contains("Could not extract any employment", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "No useful data could be extracted from this CV. The file may be a scanned image or in an unsupported format.";
|
||||
}
|
||||
|
||||
if (message.Contains("API usage limits", StringComparison.OrdinalIgnoreCase) ||
|
||||
message.Contains("rate limit", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "Service temporarily unavailable. Please try again in a few minutes.";
|
||||
}
|
||||
|
||||
if (message.Contains("Could not extract text", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "Could not read the CV file. Please ensure it's a valid PDF or DOCX document.";
|
||||
}
|
||||
|
||||
if (message.Contains("password", StringComparison.OrdinalIgnoreCase) ||
|
||||
message.Contains("encrypted", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "This CV appears to be password-protected. Please upload an unprotected version.";
|
||||
}
|
||||
|
||||
// Default message
|
||||
return "An error occurred while processing your CV. Please try uploading again.";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,29 +33,43 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
Compare the company name from a CV against official Companies House records.
|
||||
|
||||
CV Company Name: "{CV_COMPANY}"
|
||||
|
||||
{INDUSTRY_CONTEXT}
|
||||
Companies House Candidates:
|
||||
{CANDIDATES}
|
||||
|
||||
Determine which candidate (if any) is the SAME company as the CV entry.
|
||||
|
||||
Matching Guidelines:
|
||||
1. MATCH if the CV name is clearly the same organisation as a candidate:
|
||||
- "Royal Bank of Scotland" → "THE ROYAL BANK OF SCOTLAND PUBLIC LIMITED COMPANY" ✓ (same bank)
|
||||
- "Yorkshire Electricity" → "YORKSHIRE ELECTRICITY GROUP PLC" ✓ (same utility)
|
||||
1. MATCH if the CV name is the same organisation as a candidate (even if registered name differs):
|
||||
- "Boots" → "BOOTS UK LIMITED" ✓ (trading name = registered company)
|
||||
- "Boots" → "THE BOOTS COMPANY PLC" ✓ (trading name = parent company)
|
||||
- "Tesco" → "TESCO PLC" ✓ (trading name = registered name)
|
||||
- "ASDA" → "ASDA STORES LIMITED" ✓ (brand = operating company)
|
||||
- "Legal & General" → "LEGAL & GENERAL GROUP PLC" ✓ (brand = holding company)
|
||||
- "Checkout.com" → "CHECKOUT.COM PAYMENTS LIMITED" ✓ (exact match)
|
||||
- "EY UK" → "ERNST & YOUNG LLP" ✓ (trading name = partnership)
|
||||
- "Royal Bank of Scotland" → "THE ROYAL BANK OF SCOTLAND PUBLIC LIMITED COMPANY" ✓
|
||||
|
||||
2. DO NOT MATCH if the words are fundamentally different:
|
||||
- "Families First" ≠ "FAMILIES AGAINST CONFORMITY" (different words after "Families")
|
||||
- "Royal Bank" ≠ "Royal Academy" (Bank ≠ Academy)
|
||||
- "Storm Ideas" ≠ "STORM LIMITED" (missing "Ideas" - could be different company)
|
||||
2. DO NOT MATCH if the candidate adds significant DIFFERENT words that indicate a different business:
|
||||
- "Boots" ≠ "BOOTS AND BEARDS" ✗ (pharmacy chain is NOT a barber/grooming business)
|
||||
- "Legal & General" ≠ "LEGAL LIMITED" ✗ (major insurer is NOT a generic "legal" company)
|
||||
- "Checkout.com" ≠ "XN CHECKOUT LIMITED" ✗ (fintech is NOT an unrelated checkout company)
|
||||
- "EY UK" ≠ "EY UK GDPR REPRESENTATIVE LIMITED" ✗ (main employer, not a subsidiary)
|
||||
|
||||
3. Legal suffixes (Ltd, Limited, PLC, LLP, CiC) should be ignored when comparing names
|
||||
3. KEY DISTINCTION - Geographic/legal suffixes are OK, but new business words are NOT:
|
||||
- "Boots" → "BOOTS UK LIMITED" ✓ (UK is just geographic qualifier)
|
||||
- "Boots" → "BOOTS AND BEARDS" ✗ (BEARDS indicates different business)
|
||||
- "Meridian Holdings" → "MERIDIAN (THE ORIGINAL) LIMITED" ✗ ("THE ORIGINAL" suggests different business)
|
||||
- "Paramount Consulting UK" → "PARAMOUNT LIMITED" ✗ (missing "Consulting" - different type)
|
||||
- "Apex Technology Partners" → "APEX LIMITED" ✗ (missing "Technology Partners")
|
||||
|
||||
4. Adding "THE" or "GROUP" to a name doesn't make it a different company
|
||||
4. Legal suffixes (Ltd, Limited, PLC, LLP, CiC) should be ignored when comparing names
|
||||
|
||||
5. If unsure, prefer matching over rejecting when core identifying words match
|
||||
5. Adding "THE", "GROUP", "UK", or "HOLDINGS" to a name doesn't make it a different company
|
||||
|
||||
6. When the CV mentions a well-known brand, prefer the main operating/holding company over obscure matches
|
||||
|
||||
7. If INDUSTRY CONTEXT is provided, use it to reject candidates clearly in different industries
|
||||
|
||||
CRITICAL: Return the COMPLETE company number exactly as shown (e.g., "SC083026", "02366995").
|
||||
Do NOT truncate or abbreviate the company number.
|
||||
@@ -80,6 +94,7 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
public async Task<SemanticMatchResult?> FindBestMatchAsync(
|
||||
string cvCompanyName,
|
||||
List<CompanyCandidate> candidates,
|
||||
string? industryHint = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(cvCompanyName) || candidates.Count == 0)
|
||||
@@ -87,8 +102,8 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
return null;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Using AI to match '{CVCompany}' against {Count} candidates",
|
||||
cvCompanyName, candidates.Count);
|
||||
_logger.LogDebug("Using AI to match '{CVCompany}' against {Count} candidates (industry: {Industry})",
|
||||
cvCompanyName, candidates.Count, industryHint ?? "unknown");
|
||||
|
||||
try
|
||||
{
|
||||
@@ -96,8 +111,14 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
var candidatesText = string.Join("\n", candidates.Select((c, i) =>
|
||||
$"[{c.CompanyNumber}] {c.CompanyName} (Status: {c.CompanyStatus ?? "Unknown"})"));
|
||||
|
||||
// Add industry context if available
|
||||
var industryContext = string.IsNullOrEmpty(industryHint)
|
||||
? ""
|
||||
: $"Industry Context: This is a well-known brand in {industryHint}. Reject candidates clearly in different industries.\n";
|
||||
|
||||
var prompt = MatchingPrompt
|
||||
.Replace("{CV_COMPANY}", cvCompanyName)
|
||||
.Replace("{INDUSTRY_CONTEXT}", industryContext)
|
||||
.Replace("{CANDIDATES}", candidatesText);
|
||||
|
||||
var messages = new List<Message>
|
||||
@@ -107,8 +128,8 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
|
||||
var parameters = new MessageParameters
|
||||
{
|
||||
Model = "claude-sonnet-4-20250514",
|
||||
MaxTokens = 1024,
|
||||
Model = "claude-3-5-haiku-20241022",
|
||||
MaxTokens = 512,
|
||||
Messages = messages,
|
||||
System = [new SystemMessage(SystemPrompt)]
|
||||
};
|
||||
@@ -208,106 +229,359 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
||||
}
|
||||
}
|
||||
|
||||
private const string CompoundNamePrompt = """
|
||||
Analyze this company name from a CV and determine if it refers to ONE company or MULTIPLE companies.
|
||||
/// <summary>
|
||||
/// Well-known company names that contain "&" or "and" but are SINGLE companies.
|
||||
/// These should NOT be split into multiple parts.
|
||||
/// </summary>
|
||||
private static readonly HashSet<string> KnownSingleCompanyNames = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Big 4 / Professional Services
|
||||
"Ernst & Young", "Ernst and Young", "EY",
|
||||
"Deloitte and Touche", "Deloitte & Touche",
|
||||
"PricewaterhouseCoopers", "Price Waterhouse",
|
||||
"KPMG",
|
||||
"Accenture",
|
||||
|
||||
Company name: "{COMPANY_NAME}"
|
||||
// Retail
|
||||
"Marks & Spencer", "Marks and Spencer", "M&S",
|
||||
"Fortnum & Mason", "Fortnum and Mason",
|
||||
"Crabtree & Evelyn",
|
||||
"Holland & Barrett", "Holland and Barrett",
|
||||
"Past Times & Present",
|
||||
"Barnes & Noble",
|
||||
"Abercrombie & Fitch",
|
||||
"Dolce & Gabbana",
|
||||
"Bang & Olufsen",
|
||||
"Crate & Barrel",
|
||||
"Bed Bath & Beyond",
|
||||
"Bath & Body Works",
|
||||
|
||||
Examples:
|
||||
- "Ernst & Young" → ONE company (it's the full name of the accounting firm)
|
||||
- "Marks & Spencer" → ONE company (it's the full name of the retailer)
|
||||
- "ASDA/WALMART" → TWO companies: ["ASDA", "WALMART"] (person worked at both or it's showing ownership)
|
||||
- "Corus & Laura Ashley Hotels" → TWO companies: ["Corus", "Laura Ashley Hotels"] (different industries)
|
||||
- "PwC" → ONE company
|
||||
- "Deloitte and Touche" → ONE company (historical name of Deloitte)
|
||||
- "BMW Group Ireland" → ONE company
|
||||
- "Tesco Stores and Distribution" → ONE company (departments of same company)
|
||||
// Consumer Goods
|
||||
"Procter & Gamble", "Procter and Gamble", "P&G",
|
||||
"Johnson & Johnson", "Johnson and Johnson", "J&J",
|
||||
"Reckitt & Colman", "Reckitt and Colman",
|
||||
"Colgate-Palmolive",
|
||||
"Unilever",
|
||||
"Henkel",
|
||||
|
||||
Rules:
|
||||
1. Well-known company names with "&" or "and" are SINGLE companies (Ernst & Young, Marks & Spencer, Procter & Gamble)
|
||||
2. A "/" usually indicates multiple companies or ownership relationship
|
||||
3. If the parts are in completely different industries, they're likely separate companies
|
||||
4. If one part is clearly a subsidiary/department of the other, treat as ONE company
|
||||
// Food & Beverage
|
||||
"Prêt A Manger", "Pret A Manger",
|
||||
"Fortnum and Mason",
|
||||
"Lyle & Scott",
|
||||
"Ben & Jerry's", "Ben and Jerry's",
|
||||
"Baskin & Robbins",
|
||||
"Haribo",
|
||||
|
||||
Respond with ONLY valid JSON:
|
||||
{
|
||||
"isSingleCompany": boolean,
|
||||
"companies": ["company1", "company2"] or ["single company name"],
|
||||
"reasoning": "brief explanation"
|
||||
}
|
||||
""";
|
||||
// Finance & Insurance
|
||||
"Standard & Poor's", "Standard and Poor's", "S&P",
|
||||
"Moody's",
|
||||
"Fitch Ratings",
|
||||
"Lloyd's of London",
|
||||
"Coutts & Co", "Coutts and Co",
|
||||
"Brown Shipley & Co",
|
||||
"Schroders",
|
||||
|
||||
public async Task<List<string>?> ExtractCompanyNamesAsync(
|
||||
// Law Firms (common patterns)
|
||||
"Allen & Overy", "Allen and Overy",
|
||||
"Clifford Chance",
|
||||
"Freshfields Bruckhaus Deringer",
|
||||
"Linklaters",
|
||||
"Slaughter and May", "Slaughter & May",
|
||||
"Herbert Smith Freehills",
|
||||
"Hogan Lovells",
|
||||
"Norton Rose Fulbright",
|
||||
"DLA Piper",
|
||||
"Baker & McKenzie", "Baker McKenzie",
|
||||
"Eversheds Sutherland",
|
||||
"Ashurst",
|
||||
"CMS",
|
||||
"Simmons & Simmons",
|
||||
"Travers Smith",
|
||||
"Macfarlanes",
|
||||
"Addleshaw Goddard",
|
||||
"Pinsent Masons",
|
||||
"Shoosmiths",
|
||||
"Irwin Mitchell",
|
||||
"DAC Beachcroft",
|
||||
"Weightmans",
|
||||
"Browne Jacobson",
|
||||
"Mills & Reeve", "Mills and Reeve",
|
||||
"Taylor Wessing",
|
||||
"Osborne Clarke",
|
||||
"Bird & Bird", "Bird and Bird",
|
||||
"Withers",
|
||||
"Charles Russell Speechlys",
|
||||
"Stephenson Harwood",
|
||||
"Watson Farley & Williams",
|
||||
"Clyde & Co", "Clyde and Co",
|
||||
"Reed Smith",
|
||||
"Kennedys",
|
||||
"Fieldfisher",
|
||||
"RPC",
|
||||
"Womble Bond Dickinson",
|
||||
"Burges Salmon",
|
||||
"Trowers & Hamlins", "Trowers and Hamlins",
|
||||
"Bevan Brittan",
|
||||
"Veale Wasbrough Vizards",
|
||||
|
||||
// Media & Entertainment
|
||||
"Simon & Schuster",
|
||||
"Warner Bros", "Warner Brothers",
|
||||
"William Morris Endeavor",
|
||||
"Creative Artists Agency",
|
||||
|
||||
// Automotive
|
||||
"Rolls-Royce",
|
||||
"Aston Martin",
|
||||
"Jaguar Land Rover",
|
||||
|
||||
// Pharmaceuticals
|
||||
"GlaxoSmithKline", "GSK",
|
||||
"AstraZeneca",
|
||||
"Smith & Nephew",
|
||||
"Roche",
|
||||
|
||||
// Engineering & Construction
|
||||
"Mott MacDonald",
|
||||
"Arup",
|
||||
"Laing O'Rourke",
|
||||
"Kier",
|
||||
"Balfour Beatty",
|
||||
"Taylor Wimpey",
|
||||
"Persimmon",
|
||||
"Bellway",
|
||||
"Berkeley",
|
||||
|
||||
// Technology
|
||||
"Hewlett-Packard", "HP",
|
||||
"Texas Instruments",
|
||||
"AT&T",
|
||||
"T-Mobile",
|
||||
|
||||
// Other
|
||||
"Young & Co", "Young and Co",
|
||||
"Smith & Williamson",
|
||||
"Grant Thornton",
|
||||
"BDO",
|
||||
"RSM",
|
||||
"Mazars",
|
||||
"Moore Kingston Smith",
|
||||
"Crowe",
|
||||
"PKF",
|
||||
"Saffery Champness",
|
||||
"Buzzacott",
|
||||
"HW Fisher",
|
||||
"Haysmacintyre",
|
||||
"Menzies",
|
||||
"MHA",
|
||||
"Azets",
|
||||
"Dains",
|
||||
"Streets",
|
||||
"Armstrong Watson",
|
||||
|
||||
// Common department/division patterns (not to be split)
|
||||
"Sales and Marketing",
|
||||
"Research and Development", "R&D",
|
||||
"Human Resources",
|
||||
"Finance and Operations",
|
||||
"Legal and Compliance",
|
||||
"IT and Digital",
|
||||
"Supply Chain and Logistics",
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Patterns that indicate a name is likely referring to divisions/departments of ONE company.
|
||||
/// </summary>
|
||||
private static readonly string[] SingleCompanyPatterns =
|
||||
[
|
||||
" stores and ", // "Tesco Stores and Distribution"
|
||||
" retail and ", // "Next Retail and Online"
|
||||
" uk and ", // "BMW UK and Ireland"
|
||||
" europe and ", // "Google Europe and Middle East"
|
||||
" division and ",
|
||||
" department and ",
|
||||
" services and ",
|
||||
" group and ",
|
||||
" plc and ",
|
||||
" ltd and ",
|
||||
" limited and ",
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Determines if a company name refers to multiple companies and extracts them.
|
||||
/// Uses rule-based detection instead of AI for better performance and cost savings.
|
||||
/// </summary>
|
||||
public Task<List<string>?> ExtractCompanyNamesAsync(
|
||||
string companyName,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(companyName))
|
||||
{
|
||||
return null;
|
||||
return Task.FromResult<List<string>?>(null);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Using AI to check if '{CompanyName}' is a compound name", companyName);
|
||||
_logger.LogDebug("Checking if '{CompanyName}' is a compound name (rule-based)", companyName);
|
||||
|
||||
try
|
||||
var result = DetectCompoundName(companyName);
|
||||
|
||||
if (result is null)
|
||||
{
|
||||
var prompt = CompoundNamePrompt.Replace("{COMPANY_NAME}", companyName);
|
||||
|
||||
var messages = new List<Message>
|
||||
{
|
||||
new(RoleType.User, prompt)
|
||||
};
|
||||
|
||||
var parameters = new MessageParameters
|
||||
{
|
||||
Model = "claude-3-5-haiku-20241022",
|
||||
MaxTokens = 256,
|
||||
Messages = messages,
|
||||
System = [new SystemMessage("You are a company name parser. Respond only with valid JSON.")]
|
||||
};
|
||||
|
||||
var response = await _anthropicClient.Messages.GetClaudeMessageAsync(parameters, cancellationToken);
|
||||
|
||||
var responseText = response.Content
|
||||
.OfType<TextContent>()
|
||||
.FirstOrDefault()?.Text;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(responseText))
|
||||
{
|
||||
_logger.LogWarning("AI returned empty response for compound name check");
|
||||
return null;
|
||||
}
|
||||
|
||||
responseText = JsonResponseHelper.CleanJsonResponse(responseText);
|
||||
|
||||
var result = JsonSerializer.Deserialize<CompoundNameResponse>(responseText, JsonDefaults.CamelCase);
|
||||
|
||||
if (result is null)
|
||||
{
|
||||
_logger.LogWarning("Failed to deserialize compound name response: {Response}", responseText);
|
||||
return null;
|
||||
}
|
||||
|
||||
_logger.LogDebug("AI compound name result: IsSingle={IsSingle}, Companies=[{Companies}], Reasoning={Reasoning}",
|
||||
result.IsSingleCompany, string.Join(", ", result.Companies ?? []), result.Reasoning);
|
||||
|
||||
if (result.IsSingleCompany || result.Companies is null || result.Companies.Count < 2)
|
||||
{
|
||||
return null; // Single company, no splitting needed
|
||||
}
|
||||
|
||||
return result.Companies;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "AI compound name detection failed for '{CompanyName}'", companyName);
|
||||
return null;
|
||||
_logger.LogDebug("'{CompanyName}' is a single company", companyName);
|
||||
return Task.FromResult<List<string>?>(null);
|
||||
}
|
||||
|
||||
_logger.LogDebug("'{CompanyName}' detected as compound, parts: [{Parts}]",
|
||||
companyName, string.Join(", ", result));
|
||||
|
||||
return Task.FromResult<List<string>?>(result);
|
||||
}
|
||||
|
||||
private sealed class CompoundNameResponse
|
||||
/// <summary>
|
||||
/// Rule-based detection of compound company names.
|
||||
/// Returns null if single company, or list of parts if multiple companies.
|
||||
/// </summary>
|
||||
private List<string>? DetectCompoundName(string name)
|
||||
{
|
||||
public bool IsSingleCompany { get; set; }
|
||||
public List<string>? Companies { get; set; }
|
||||
public string? Reasoning { get; set; }
|
||||
var trimmedName = name.Trim();
|
||||
|
||||
// Check 1: Is this a known single company name?
|
||||
if (IsKnownSingleCompany(trimmedName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check 2: Does it match single-company patterns (departments/divisions)?
|
||||
if (MatchesSingleCompanyPattern(trimmedName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check 3: "/" is a strong indicator of multiple companies
|
||||
if (trimmedName.Contains('/'))
|
||||
{
|
||||
var slashParts = trimmedName
|
||||
.Split('/')
|
||||
.Select(p => p.Trim())
|
||||
.Where(p => p.Length >= 2)
|
||||
.ToList();
|
||||
|
||||
if (slashParts.Count >= 2)
|
||||
{
|
||||
return slashParts;
|
||||
}
|
||||
}
|
||||
|
||||
// Check 4: " & " or " and " between what look like separate company names
|
||||
// Only split if both parts look like distinct company names
|
||||
var andMatch = System.Text.RegularExpressions.Regex.Match(
|
||||
trimmedName,
|
||||
@"^(.+?)\s+(?:&|and)\s+(.+)$",
|
||||
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
|
||||
if (andMatch.Success)
|
||||
{
|
||||
var part1 = andMatch.Groups[1].Value.Trim();
|
||||
var part2 = andMatch.Groups[2].Value.Trim();
|
||||
|
||||
// If the combined name is a known single company, don't split
|
||||
if (IsKnownSingleCompany(trimmedName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// If either part is very short (like initials), probably not a split
|
||||
if (part1.Length < 3 || part2.Length < 3)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// If part2 looks like a department/role descriptor, don't split
|
||||
if (IsDepartmentOrRole(part2))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// If both parts look like independent company names, this is likely compound
|
||||
if (LooksLikeCompanyName(part1) && LooksLikeCompanyName(part2))
|
||||
{
|
||||
return [part1, part2];
|
||||
}
|
||||
}
|
||||
|
||||
// Default: treat as single company
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool IsKnownSingleCompany(string name)
|
||||
{
|
||||
// Direct match
|
||||
if (KnownSingleCompanyNames.Contains(name))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the name contains any known single company as a substring
|
||||
foreach (var known in KnownSingleCompanyNames)
|
||||
{
|
||||
if (name.Contains(known, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool MatchesSingleCompanyPattern(string name)
|
||||
{
|
||||
var lowerName = name.ToLowerInvariant();
|
||||
return SingleCompanyPatterns.Any(pattern => lowerName.Contains(pattern));
|
||||
}
|
||||
|
||||
private static bool IsDepartmentOrRole(string text)
|
||||
{
|
||||
var lower = text.ToLowerInvariant();
|
||||
string[] departmentKeywords =
|
||||
[
|
||||
"department", "division", "team", "group", "unit",
|
||||
"services", "solutions", "operations", "logistics",
|
||||
"distribution", "manufacturing", "production",
|
||||
"marketing", "sales", "finance", "accounting",
|
||||
"hr", "human resources", "it", "technology",
|
||||
"research", "development", "r&d", "engineering",
|
||||
"retail", "wholesale", "stores", "online",
|
||||
"consulting", "advisory", "support"
|
||||
];
|
||||
|
||||
return departmentKeywords.Any(kw => lower.Contains(kw));
|
||||
}
|
||||
|
||||
private static bool LooksLikeCompanyName(string text)
|
||||
{
|
||||
// A company name typically:
|
||||
// - Is at least 2 characters
|
||||
// - Starts with a capital letter (or is all caps)
|
||||
// - May end with Ltd, Limited, PLC, Inc, etc.
|
||||
|
||||
if (text.Length < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// If it contains company suffixes, definitely a company name
|
||||
string[] companySuffixes = ["ltd", "limited", "plc", "inc", "corp", "llp", "llc", "group", "holdings"];
|
||||
var lower = text.ToLowerInvariant();
|
||||
if (companySuffixes.Any(s => lower.EndsWith(s) || lower.Contains($" {s}")))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// If it looks like it could be a company (starts with capital, reasonable length)
|
||||
if (char.IsUpper(text[0]) && text.Length >= 3)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,8 +171,8 @@ public sealed class CVParserService : ICVParserService
|
||||
|
||||
var parameters = new MessageParameters
|
||||
{
|
||||
Model = "claude-sonnet-4-20250514",
|
||||
MaxTokens = 4096,
|
||||
Model = "claude-3-5-haiku-20241022",
|
||||
MaxTokens = 2048,
|
||||
Messages = messages,
|
||||
System = [new SystemMessage(SystemPrompt)]
|
||||
};
|
||||
|
||||
@@ -73,6 +73,127 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
"manufacturing", "operations", "trading"
|
||||
};
|
||||
|
||||
// Words that are "safe expansions" - they don't change company identity when added
|
||||
// "Boots" -> "BOOTS UK LIMITED" is safe (UK + LIMITED are standard suffixes)
|
||||
// "Boots" -> "THE BOOTS COMPANY PLC" is safe (THE + COMPANY + PLC are standard)
|
||||
// These are NOT counted as "extra meaningful words" in matching
|
||||
private static readonly HashSet<string> SafeExpansionWords = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Legal structures (already in SkipWords, but explicit here for clarity)
|
||||
"limited", "ltd", "plc", "llp", "llc", "inc", "incorporated", "corporation", "corp",
|
||||
"company", "co", "partners", "partnership",
|
||||
|
||||
// Corporate structure words - these expand but don't change identity
|
||||
"group", "holdings", "holding", "the",
|
||||
|
||||
// Geographic qualifiers (already in SkipWords)
|
||||
"uk", "u.k.", "gb", "britain", "british", "england", "europe", "european",
|
||||
"international", "global", "worldwide",
|
||||
|
||||
// Common corporate suffixes
|
||||
"services", "solutions", // "Boots" vs "BOOTS SERVICES LIMITED" - likely same company
|
||||
"retail", "stores", // "ASDA" vs "ASDA STORES LIMITED" - same company
|
||||
};
|
||||
|
||||
// Well-known UK trading names/brands mapped to their expected company name patterns
|
||||
// This provides fast-path recognition for major employers without full AI evaluation
|
||||
// Key: Trading name (how people commonly refer to the company)
|
||||
// Value: Tuple of (primary company number, list of acceptable name patterns)
|
||||
private static readonly Dictionary<string, WellKnownBrand> WellKnownBrands =
|
||||
new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Retail
|
||||
["Boots"] = new("00928555", "pharmacy/healthcare retail", new[] { "BOOTS UK", "BOOTS COMPANY", "BOOTS PLC", "BOOTS LIMITED" }),
|
||||
["ASDA"] = new("00464777", "supermarket retail", new[] { "ASDA STORES", "ASDA GROUP", "ASDA PLC" }),
|
||||
["Tesco"] = new("00445790", "supermarket retail", new[] { "TESCO PLC", "TESCO STORES", "TESCO UK" }),
|
||||
["Sainsbury"] = new("00185647", "supermarket retail", new[] { "SAINSBURY", "J SAINSBURY" }),
|
||||
["Sainsbury's"] = new("00185647", "supermarket retail", new[] { "SAINSBURY", "J SAINSBURY" }),
|
||||
["Morrisons"] = new("00358949", "supermarket retail", new[] { "WM MORRISON", "MORRISON SUPERMARKETS" }),
|
||||
["Waitrose"] = new("00099405", "supermarket retail", new[] { "WAITROSE", "JOHN LEWIS" }),
|
||||
["Marks & Spencer"] = new("00214436", "retail", new[] { "MARKS AND SPENCER", "MARKS & SPENCER" }),
|
||||
["M&S"] = new("00214436", "retail", new[] { "MARKS AND SPENCER", "MARKS & SPENCER" }),
|
||||
["John Lewis"] = new("00233462", "retail", new[] { "JOHN LEWIS", "JOHN LEWIS PARTNERSHIP" }),
|
||||
["Next"] = new("04425340", "retail", new[] { "NEXT PLC", "NEXT RETAIL", "NEXT GROUP" }),
|
||||
["Primark"] = new("NI016270", "retail", new[] { "PRIMARK", "PENNEYS", "ASSOCIATED BRITISH FOODS" }),
|
||||
["Argos"] = new("01081551", "retail", new[] { "ARGOS", "SAINSBURY'S ARGOS" }),
|
||||
|
||||
// Finance & Insurance
|
||||
["Legal & General"] = new("01417162", "insurance/financial services", new[] { "LEGAL AND GENERAL", "LEGAL & GENERAL", "L&G" }),
|
||||
["Aviva"] = new("02468686", "insurance", new[] { "AVIVA", "NORWICH UNION" }),
|
||||
["Prudential"] = new("01397169", "insurance/financial services", new[] { "PRUDENTIAL" }),
|
||||
["AXA"] = new("01878835", "insurance", new[] { "AXA UK", "AXA INSURANCE" }),
|
||||
["Lloyds Banking Group"] = new("00002065", "banking", new[] { "LLOYDS BANK", "LLOYDS BANKING" }),
|
||||
["Barclays"] = new("01026167", "banking", new[] { "BARCLAYS BANK", "BARCLAYS PLC" }),
|
||||
["HSBC"] = new("00014259", "banking", new[] { "HSBC BANK", "HSBC UK", "HSBC HOLDINGS" }),
|
||||
["NatWest"] = new("00929027", "banking", new[] { "NATWEST", "NATIONAL WESTMINSTER", "NATWEST GROUP" }),
|
||||
|
||||
// Professional Services
|
||||
["EY"] = new("OC300001", "accounting/professional services", new[] { "ERNST & YOUNG", "EY LLP", "ERNST AND YOUNG" }),
|
||||
["EY UK"] = new("OC300001", "accounting/professional services", new[] { "ERNST & YOUNG", "EY LLP" }),
|
||||
["Ernst & Young"] = new("OC300001", "accounting/professional services", new[] { "ERNST & YOUNG", "EY LLP" }),
|
||||
["PwC"] = new("OC303525", "accounting/professional services", new[] { "PRICEWATERHOUSECOOPERS", "PWC" }),
|
||||
["Deloitte"] = new("OC303675", "accounting/professional services", new[] { "DELOITTE LLP", "DELOITTE" }),
|
||||
["KPMG"] = new("OC301540", "accounting/professional services", new[] { "KPMG LLP", "KPMG" }),
|
||||
["Accenture"] = new("04abortedt6", "consulting", new[] { "ACCENTURE UK", "ACCENTURE" }),
|
||||
["McKinsey"] = new("03883888", "consulting", new[] { "MCKINSEY", "MCKINSEY & COMPANY" }),
|
||||
|
||||
// Technology
|
||||
["Checkout.com"] = new("09131987", "fintech/payments", new[] { "CHECKOUT.COM", "CHECKOUT LTD", "CHECKOUT PAYMENTS" }),
|
||||
["Revolut"] = new("08804411", "fintech", new[] { "REVOLUT LTD", "REVOLUT" }),
|
||||
["Monzo"] = new("09446231", "fintech", new[] { "MONZO BANK", "MONZO" }),
|
||||
["Wise"] = new("07209813", "fintech", new[] { "WISE", "TRANSFERWISE" }),
|
||||
["TransferWise"] = new("07209813", "fintech", new[] { "WISE", "TRANSFERWISE" }),
|
||||
["Deliveroo"] = new("08167130", "food delivery", new[] { "DELIVEROO", "ROO" }),
|
||||
["Just Eat"] = new("02465307", "food delivery", new[] { "JUST EAT", "JUST-EAT" }),
|
||||
["IBM"] = new("00741598", "technology", new[] { "IBM", "IBM UK", "INTERNATIONAL BUSINESS MACHINES" }),
|
||||
["IBM UK"] = new("00741598", "technology", new[] { "IBM", "IBM UK", "INTERNATIONAL BUSINESS MACHINES" }),
|
||||
["JCB"] = new("00561597", "manufacturing/machinery", new[] { "JCB", "J C BAMFORD", "BAMFORD EXCAVATORS" }),
|
||||
["Brewdog"] = new("SC311560", "brewing/hospitality", new[] { "BREWDOG", "BREW DOG" }),
|
||||
["BrewDog"] = new("SC311560", "brewing/hospitality", new[] { "BREWDOG", "BREW DOG" }),
|
||||
["Cazoo"] = new("11043737", "automotive/retail", new[] { "CAZOO" }),
|
||||
["Gymshark"] = new("08396100", "retail/fitness", new[] { "GYMSHARK", "GYM SHARK" }),
|
||||
|
||||
// Telecoms & Media
|
||||
["BT"] = new("01800000", "telecoms", new[] { "BT GROUP", "BT PLC", "BRITISH TELECOM" }),
|
||||
["Vodafone"] = new("01471587", "telecoms", new[] { "VODAFONE", "VODAFONE UK", "VODAFONE GROUP" }),
|
||||
["Sky"] = new("02247735", "media/telecoms", new[] { "SKY UK", "SKY LIMITED", "BSkyB" }),
|
||||
["Virgin Media"] = new("02591237", "telecoms", new[] { "VIRGIN MEDIA", "VIRGIN MEDIA O2" }),
|
||||
|
||||
// Airlines & Travel
|
||||
["British Airways"] = new("01777777", "airline", new[] { "BRITISH AIRWAYS", "BA PLC" }),
|
||||
["BA"] = new("01777777", "airline", new[] { "BRITISH AIRWAYS", "BA PLC" }),
|
||||
["easyJet"] = new("03959649", "airline", new[] { "EASYJET", "EASY JET" }),
|
||||
["Ryanair"] = new("01914abortedt", "airline", new[] { "RYANAIR UK", "RYANAIR" }),
|
||||
|
||||
// Energy
|
||||
["BP"] = new("00102498", "oil & gas", new[] { "BP P.L.C.", "BP PLC", "BRITISH PETROLEUM" }),
|
||||
["Shell"] = new("04366849", "oil & gas", new[] { "SHELL UK", "SHELL PLC", "ROYAL DUTCH SHELL" }),
|
||||
["National Grid"] = new("04031152", "utilities", new[] { "NATIONAL GRID", "NATIONAL GRID PLC" }),
|
||||
["SSE"] = new("SC117119", "utilities", new[] { "SSE PLC", "SSE ENERGY", "SCOTTISH AND SOUTHERN" }),
|
||||
["Centrica"] = new("03033654", "utilities", new[] { "CENTRICA", "BRITISH GAS" }),
|
||||
|
||||
// Pharma & Healthcare
|
||||
["GSK"] = new("03888792", "pharmaceuticals", new[] { "GLAXOSMITHKLINE", "GSK PLC" }),
|
||||
["GlaxoSmithKline"] = new("03888792", "pharmaceuticals", new[] { "GLAXOSMITHKLINE", "GSK" }),
|
||||
["AstraZeneca"] = new("02723534", "pharmaceuticals", new[] { "ASTRAZENECA", "ASTRA ZENECA" }),
|
||||
|
||||
// Manufacturing & Industrial
|
||||
["Rolls-Royce"] = new("01003142", "aerospace/engineering", new[] { "ROLLS-ROYCE", "ROLLS ROYCE" }),
|
||||
["BAE Systems"] = new("01470151", "defence/aerospace", new[] { "BAE SYSTEMS" }),
|
||||
["Dyson"] = new("02023199", "manufacturing/technology", new[] { "DYSON", "DYSON TECHNOLOGY" }),
|
||||
|
||||
// Automotive
|
||||
["Jaguar Land Rover"] = new("01672070", "automotive", new[] { "JAGUAR LAND ROVER", "JLR" }),
|
||||
["JLR"] = new("01672070", "automotive", new[] { "JAGUAR LAND ROVER" }),
|
||||
};
|
||||
|
||||
// Record to hold well-known brand information
|
||||
private sealed record WellKnownBrand(
|
||||
string PrimaryCompanyNumber,
|
||||
string Industry,
|
||||
string[] AcceptablePatterns
|
||||
);
|
||||
|
||||
|
||||
public CompanyVerifierService(
|
||||
CompaniesHouseClient companiesHouseClient,
|
||||
@@ -141,6 +262,94 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
};
|
||||
}
|
||||
|
||||
// Check 1c: Is this self-employment?
|
||||
if (IsSelfEmployment(normalizedName))
|
||||
{
|
||||
_logger.LogInformation("Recognised self-employment: {CompanyName}", companyName);
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = companyName,
|
||||
MatchedCompanyNumber = null,
|
||||
MatchScore = 100,
|
||||
IsVerified = true,
|
||||
VerificationNotes = "Self-employed / freelance - not a registered company",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate,
|
||||
CompanyType = "self-employed",
|
||||
CompanyStatus = "active",
|
||||
ClaimedJobTitle = jobTitle,
|
||||
Flags = flags
|
||||
};
|
||||
}
|
||||
|
||||
// Check 1d: Is this an overseas/international company?
|
||||
var overseasInfo = DetectOverseasCompany(normalizedName);
|
||||
if (overseasInfo is not null)
|
||||
{
|
||||
_logger.LogInformation("Recognised overseas company: {CompanyName} ({Country})", companyName, overseasInfo.Value.Country);
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = companyName,
|
||||
MatchedCompanyNumber = null,
|
||||
MatchScore = 100,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Overseas company ({overseasInfo.Value.Country}) - not registered at UK Companies House",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate,
|
||||
CompanyType = "overseas",
|
||||
CompanyStatus = "active",
|
||||
ClaimedJobTitle = jobTitle,
|
||||
Flags = flags
|
||||
};
|
||||
}
|
||||
|
||||
// Check 1e: Is this a well-known brand we can fast-track verify?
|
||||
var knownBrand = GetWellKnownBrand(normalizedName);
|
||||
if (knownBrand != null && !string.IsNullOrEmpty(knownBrand.PrimaryCompanyNumber))
|
||||
{
|
||||
_logger.LogInformation("Fast-track verifying well-known brand '{CompanyName}' -> company #{CompanyNumber}",
|
||||
companyName, knownBrand.PrimaryCompanyNumber);
|
||||
|
||||
// Look up the company directly from Companies House
|
||||
try
|
||||
{
|
||||
var companyDetails = await _companiesHouseClient.GetCompanyAsync(knownBrand.PrimaryCompanyNumber);
|
||||
if (companyDetails != null)
|
||||
{
|
||||
DateOnly? incorporationDate = null;
|
||||
if (!string.IsNullOrEmpty(companyDetails.DateOfCreation) &&
|
||||
DateOnly.TryParse(companyDetails.DateOfCreation, out var parsedDate))
|
||||
{
|
||||
incorporationDate = parsedDate;
|
||||
}
|
||||
|
||||
return new CompanyVerificationResult
|
||||
{
|
||||
ClaimedCompany = companyName,
|
||||
MatchedCompanyName = companyDetails.CompanyName,
|
||||
MatchedCompanyNumber = knownBrand.PrimaryCompanyNumber,
|
||||
MatchScore = 100,
|
||||
IsVerified = true,
|
||||
VerificationNotes = $"Well-known brand ({knownBrand.Industry})",
|
||||
ClaimedStartDate = startDate,
|
||||
ClaimedEndDate = endDate,
|
||||
CompanyType = companyDetails.Type,
|
||||
CompanyStatus = companyDetails.CompanyStatus,
|
||||
IncorporationDate = incorporationDate,
|
||||
ClaimedJobTitle = jobTitle,
|
||||
Flags = flags
|
||||
};
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to fast-track verify well-known brand '{CompanyName}', falling back to search", companyName);
|
||||
// Fall through to normal search
|
||||
}
|
||||
}
|
||||
|
||||
// Check 2: Is this an internal division of a larger company?
|
||||
var parentCompany = UKHistoricalEmployers.GetParentCompanyForDivision(normalizedName);
|
||||
if (parentCompany != null)
|
||||
@@ -303,10 +512,65 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
// Use AI to find the best semantic match from all candidates
|
||||
_logger.LogDebug("Using AI to match '{CompanyName}' against {Count} candidates", normalizedName, allCandidates.Count);
|
||||
|
||||
// Sort candidates by fuzzy relevance to the search term before taking top 10
|
||||
// This ensures the most likely matches are sent to the AI, not just arbitrary entries
|
||||
// Check if this is a well-known brand - if so, we can be more lenient with matching
|
||||
var wellKnownBrand = GetWellKnownBrand(normalizedName);
|
||||
if (wellKnownBrand != null)
|
||||
{
|
||||
_logger.LogDebug("Recognised well-known brand '{CompanyName}' (industry: {Industry})",
|
||||
normalizedName, wellKnownBrand.Industry);
|
||||
}
|
||||
|
||||
// Extract core identifiers from the original company name
|
||||
var originalCoreWords = ExtractCoreIdentifiers(normalizedName);
|
||||
var normalizedUpper = normalizedName.ToUpperInvariant();
|
||||
var candidatesForAI = allCandidates.Values
|
||||
|
||||
// Pre-filter candidates: reject those missing significant core words from the original
|
||||
// This prevents "Northwick Industries Limited" matching "NORTHWICK LIMITED" (missing INDUSTRIES)
|
||||
// BUT: for well-known brands or candidates with only safe expansions, we're more lenient
|
||||
var filteredCandidates = allCandidates.Values
|
||||
.Where(c =>
|
||||
{
|
||||
// All original core words must appear in the candidate
|
||||
var candidateTitle = c.Title.ToUpperInvariant();
|
||||
var hasAllCores = originalCoreWords.Count == 0 ||
|
||||
originalCoreWords.All(w => candidateTitle.Contains(w));
|
||||
|
||||
if (!hasAllCores)
|
||||
{
|
||||
// For well-known brands, check if candidate matches acceptable patterns
|
||||
if (wellKnownBrand != null && MatchesWellKnownBrandPatterns(c.Title, wellKnownBrand))
|
||||
{
|
||||
_logger.LogDebug("Pre-filter allowing '{Candidate}' - matches well-known brand pattern for '{Original}'",
|
||||
c.Title, normalizedName);
|
||||
return true;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Pre-filter rejected '{Candidate}' - missing core words from '{Original}'. " +
|
||||
"Required: [{Required}]", c.Title, normalizedName, string.Join(", ", originalCoreWords));
|
||||
return false;
|
||||
}
|
||||
|
||||
// For candidates that have all core words, check if extra words are safe
|
||||
// This is a RELAXED filter for AI candidates - we let the AI make the final call
|
||||
// But we still log for debugging
|
||||
var candidateCores = ExtractCoreIdentifiers(c.Title);
|
||||
var meaningfulExtras = CountMeaningfulExtraWords(originalCoreWords, candidateCores);
|
||||
if (meaningfulExtras > 0)
|
||||
{
|
||||
_logger.LogDebug("Pre-filter note: '{Candidate}' has {ExtraCount} meaningful extra words vs '{Original}'",
|
||||
c.Title, meaningfulExtras, normalizedName);
|
||||
}
|
||||
|
||||
return true; // Let AI evaluate candidates with extra words
|
||||
})
|
||||
.ToList();
|
||||
|
||||
_logger.LogDebug("Pre-filtered {Original} candidates to {Filtered} candidates",
|
||||
allCandidates.Count, filteredCandidates.Count);
|
||||
|
||||
// Sort remaining candidates by fuzzy relevance to the search term before taking top 10
|
||||
// This ensures the most likely matches are sent to the AI, not just arbitrary entries
|
||||
var candidatesForAI = filteredCandidates
|
||||
.Select(c => new
|
||||
{
|
||||
Item = c,
|
||||
@@ -326,7 +590,9 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
_logger.LogDebug("Top candidates for AI matching (sorted by relevance): {Candidates}",
|
||||
string.Join(", ", candidatesForAI.Select(c => $"{c.CompanyName} [{c.CompanyNumber}]")));
|
||||
|
||||
var aiResult = await _aiMatcher.FindBestMatchAsync(normalizedName, candidatesForAI);
|
||||
// Pass industry context if this is a well-known brand
|
||||
var industryHint = wellKnownBrand?.Industry;
|
||||
var aiResult = await _aiMatcher.FindBestMatchAsync(normalizedName, candidatesForAI, industryHint);
|
||||
|
||||
CompaniesHouseSearchItem? matchedItem = null;
|
||||
int matchScore;
|
||||
@@ -350,10 +616,9 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
// AI didn't find a match - check if it explicitly rejected or just failed
|
||||
if (aiResult?.MatchType == "NoMatch")
|
||||
{
|
||||
// AI explicitly rejected. Only override if fuzzy match passes strict validation:
|
||||
// 1. High fuzzy score (>= 90%)
|
||||
// 2. ALL core identifying words from original name appear in the match
|
||||
// 3. Match doesn't have significantly more core words (prevents partial word matches)
|
||||
// AI explicitly rejected. Only override if fuzzy match passes STRICT validation.
|
||||
// We trust the AI's judgment - only override in clear-cut cases where
|
||||
// the fuzzy match is essentially identical to the original OR has only safe expansions.
|
||||
if (bestFuzzy.HasValue && bestFuzzy.Value.Score >= 90)
|
||||
{
|
||||
var originalCores = ExtractCoreIdentifiers(normalizedName);
|
||||
@@ -363,18 +628,33 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var allCoresPresent = originalCores.Count == 0 ||
|
||||
originalCores.All(c => bestFuzzy.Value.Item.Title.Contains(c, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
// Match shouldn't have too many extra core words (max 2 extra, e.g., "GROUP PLC")
|
||||
var extraCores = matchCores.Count(c => !originalCores.Any(o =>
|
||||
c.Equals(o, StringComparison.OrdinalIgnoreCase)));
|
||||
var reasonableExtras = extraCores <= 2;
|
||||
// Count MEANINGFUL extra core words (excluding safe expansions like UK, LIMITED, COMPANY)
|
||||
var meaningfulExtras = CountMeaningfulExtraWords(originalCores, matchCores);
|
||||
|
||||
if (allCoresPresent && reasonableExtras)
|
||||
// For short company names (1-2 core words), no meaningful extras allowed
|
||||
// For longer names, allow up to 1 meaningful extra
|
||||
// But if it's a well-known brand matching an acceptable pattern, allow more
|
||||
var isShortName = originalCores.Count <= 2;
|
||||
var maxAllowedExtras = isShortName ? 0 : 1;
|
||||
|
||||
// Well-known brand override: if the match fits acceptable patterns, allow it
|
||||
var brandOverride = wellKnownBrand != null &&
|
||||
MatchesWellKnownBrandPatterns(bestFuzzy.Value.Item.Title, wellKnownBrand);
|
||||
|
||||
var reasonableExtras = meaningfulExtras <= maxAllowedExtras || brandOverride;
|
||||
|
||||
// Additional check: if match has significantly MORE core words than original,
|
||||
// it's likely a different company entirely
|
||||
var coreDifference = matchCores.Count - originalCores.Count;
|
||||
var acceptableCoreDifference = coreDifference <= 2; // Allow 2 extra total (could be safe expansions)
|
||||
|
||||
if (allCoresPresent && reasonableExtras && acceptableCoreDifference)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"AI rejected '{CompanyName}' but fuzzy match '{MatchedName}' ({Score}%) passes validation. " +
|
||||
"Original cores: [{OriginalCores}], Match cores: [{MatchCores}]",
|
||||
"Original cores: [{OriginalCores}], Match cores: [{MatchCores}], MeaningfulExtras: {Extra}, BrandOverride: {Override}",
|
||||
normalizedName, bestFuzzy.Value.Item.Title, bestFuzzy.Value.Score,
|
||||
string.Join(", ", originalCores), string.Join(", ", matchCores));
|
||||
string.Join(", ", originalCores), string.Join(", ", matchCores), meaningfulExtras, brandOverride);
|
||||
matchedItem = bestFuzzy.Value.Item;
|
||||
matchScore = bestFuzzy.Value.Score;
|
||||
}
|
||||
@@ -382,8 +662,8 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"AI rejected '{CompanyName}' and fuzzy match '{MatchedName}' fails validation. " +
|
||||
"AllCoresPresent: {AllCores}, ExtraCores: {Extra}",
|
||||
normalizedName, bestFuzzy.Value.Item.Title, allCoresPresent, extraCores);
|
||||
"AllCoresPresent: {AllCores}, MeaningfulExtras: {Extra} (max: {MaxAllowed}), CoreDiff: {CoreDiff}, BrandOverride: {Override}",
|
||||
normalizedName, bestFuzzy.Value.Item.Title, allCoresPresent, meaningfulExtras, maxAllowedExtras, coreDifference, brandOverride);
|
||||
return CreateUnverifiedResult(companyName, startDate, endDate, jobTitle,
|
||||
"Company name could not be verified - no matching company found in official records");
|
||||
}
|
||||
@@ -841,6 +1121,162 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects if a company name indicates self-employment or freelance work.
|
||||
/// </summary>
|
||||
private static bool IsSelfEmployment(string companyName)
|
||||
{
|
||||
var lower = companyName.ToLowerInvariant().Trim();
|
||||
|
||||
// Exact matches
|
||||
var selfEmployedTerms = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"self-employed", "self employed", "selfemployed",
|
||||
"freelance", "freelancer", "free-lance", "free lance",
|
||||
"contractor", "independent contractor",
|
||||
"sole trader", "sole-trader", "soletrader",
|
||||
"consultant", "independent consultant",
|
||||
"self", "myself", "own business", "own company",
|
||||
"private practice", "private consultancy",
|
||||
"portfolio career", "various clients",
|
||||
"contract work", "contracting"
|
||||
};
|
||||
|
||||
if (selfEmployedTerms.Contains(lower))
|
||||
return true;
|
||||
|
||||
// Pattern matches
|
||||
if (lower.StartsWith("self-employed") || lower.StartsWith("self employed"))
|
||||
return true;
|
||||
if (lower.StartsWith("freelance") || lower.StartsWith("free-lance"))
|
||||
return true;
|
||||
if (lower.EndsWith("(self-employed)") || lower.EndsWith("(freelance)"))
|
||||
return true;
|
||||
if (lower.Contains("self-employed as") || lower.Contains("freelancing as"))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Detects if a company name indicates an overseas/international company not registered in the UK.
|
||||
/// Returns the detected country if found, null otherwise.
|
||||
/// </summary>
|
||||
private static (string Country, string BaseName)? DetectOverseasCompany(string companyName)
|
||||
{
|
||||
var lower = companyName.ToLowerInvariant();
|
||||
|
||||
// Countries and their variations that indicate non-UK companies
|
||||
var overseasCountries = new Dictionary<string[], string>
|
||||
{
|
||||
// North America
|
||||
{ new[] { "canada", "canadian" }, "Canada" },
|
||||
{ new[] { "usa", "u.s.a.", "u.s.", "united states", "american", "america" }, "United States" },
|
||||
{ new[] { "mexico", "mexican" }, "Mexico" },
|
||||
|
||||
// Europe (non-UK)
|
||||
{ new[] { "ireland", "irish", "eire", "dublin" }, "Ireland" },
|
||||
{ new[] { "france", "french", "paris" }, "France" },
|
||||
{ new[] { "germany", "german", "deutsche", "berlin", "munich" }, "Germany" },
|
||||
{ new[] { "spain", "spanish", "madrid", "barcelona" }, "Spain" },
|
||||
{ new[] { "italy", "italian", "milan", "rome" }, "Italy" },
|
||||
{ new[] { "netherlands", "dutch", "holland", "amsterdam" }, "Netherlands" },
|
||||
{ new[] { "belgium", "belgian", "brussels" }, "Belgium" },
|
||||
{ new[] { "switzerland", "swiss", "zurich", "geneva" }, "Switzerland" },
|
||||
{ new[] { "austria", "austrian", "vienna" }, "Austria" },
|
||||
{ new[] { "sweden", "swedish", "stockholm" }, "Sweden" },
|
||||
{ new[] { "norway", "norwegian", "oslo" }, "Norway" },
|
||||
{ new[] { "denmark", "danish", "copenhagen" }, "Denmark" },
|
||||
{ new[] { "finland", "finnish", "helsinki" }, "Finland" },
|
||||
{ new[] { "poland", "polish", "warsaw" }, "Poland" },
|
||||
{ new[] { "portugal", "portuguese", "lisbon" }, "Portugal" },
|
||||
{ new[] { "greece", "greek", "athens" }, "Greece" },
|
||||
{ new[] { "cyprus", "cypriot", "nicosia" }, "Cyprus" },
|
||||
{ new[] { "czech", "prague" }, "Czech Republic" },
|
||||
{ new[] { "hungary", "hungarian", "budapest" }, "Hungary" },
|
||||
{ new[] { "romania", "romanian", "bucharest" }, "Romania" },
|
||||
|
||||
// Asia Pacific
|
||||
{ new[] { "australia", "australian", "sydney", "melbourne" }, "Australia" },
|
||||
{ new[] { "new zealand", "nz", "auckland", "wellington" }, "New Zealand" },
|
||||
{ new[] { "japan", "japanese", "tokyo" }, "Japan" },
|
||||
{ new[] { "china", "chinese", "beijing", "shanghai", "hong kong" }, "China" },
|
||||
{ new[] { "india", "indian", "mumbai", "delhi", "bangalore" }, "India" },
|
||||
{ new[] { "singapore", "singaporean" }, "Singapore" },
|
||||
{ new[] { "malaysia", "malaysian", "kuala lumpur" }, "Malaysia" },
|
||||
{ new[] { "korea", "korean", "seoul" }, "South Korea" },
|
||||
{ new[] { "taiwan", "taiwanese", "taipei" }, "Taiwan" },
|
||||
{ new[] { "thailand", "thai", "bangkok" }, "Thailand" },
|
||||
{ new[] { "philippines", "filipino", "manila" }, "Philippines" },
|
||||
{ new[] { "indonesia", "indonesian", "jakarta" }, "Indonesia" },
|
||||
{ new[] { "vietnam", "vietnamese", "hanoi", "ho chi minh" }, "Vietnam" },
|
||||
|
||||
// Middle East & Africa
|
||||
{ new[] { "uae", "u.a.e.", "dubai", "abu dhabi", "emirates" }, "UAE" },
|
||||
{ new[] { "saudi", "riyadh", "jeddah" }, "Saudi Arabia" },
|
||||
{ new[] { "qatar", "doha" }, "Qatar" },
|
||||
{ new[] { "israel", "israeli", "tel aviv" }, "Israel" },
|
||||
{ new[] { "south africa", "johannesburg", "cape town" }, "South Africa" },
|
||||
{ new[] { "egypt", "egyptian", "cairo" }, "Egypt" },
|
||||
{ new[] { "nigeria", "nigerian", "lagos" }, "Nigeria" },
|
||||
{ new[] { "kenya", "kenyan", "nairobi" }, "Kenya" },
|
||||
|
||||
// South America
|
||||
{ new[] { "brazil", "brazilian", "sao paulo", "rio" }, "Brazil" },
|
||||
{ new[] { "argentina", "argentine", "buenos aires" }, "Argentina" },
|
||||
{ new[] { "chile", "chilean", "santiago" }, "Chile" },
|
||||
{ new[] { "colombia", "colombian", "bogota" }, "Colombia" },
|
||||
};
|
||||
|
||||
// Check for country indicators at the end or in the name
|
||||
foreach (var (terms, country) in overseasCountries)
|
||||
{
|
||||
foreach (var term in terms)
|
||||
{
|
||||
// Check if name ends with country (e.g., "BMW Group Canada")
|
||||
if (lower.EndsWith(" " + term))
|
||||
{
|
||||
var baseName = companyName[..^(term.Length + 1)].Trim();
|
||||
return (country, baseName);
|
||||
}
|
||||
|
||||
// Check for patterns like "Company Name (Country)" or "Company Name - Country"
|
||||
if (lower.EndsWith($"({term})") || lower.EndsWith($"- {term}") || lower.EndsWith($", {term}"))
|
||||
{
|
||||
var idx = lower.LastIndexOf(term);
|
||||
var baseName = companyName[..(idx - 2)].Trim().TrimEnd('(', '-', ',').Trim();
|
||||
return (country, baseName);
|
||||
}
|
||||
|
||||
// Check for "Country Office" or "Country Branch" patterns
|
||||
if (lower.Contains($"{term} office") || lower.Contains($"{term} branch") ||
|
||||
lower.Contains($"{term} division") || lower.Contains($"{term} operations"))
|
||||
{
|
||||
return (country, companyName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for international organization patterns
|
||||
var internationalPatterns = new[]
|
||||
{
|
||||
"national guard", "armed forces", "military", "army", "navy", "air force",
|
||||
"embassy", "consulate", "foreign ministry",
|
||||
"max planck", "fraunhofer", "cnrs", "csiro", "nasa", "esa",
|
||||
"world bank", "imf", "united nations", "un ", "nato", "who ", "unesco"
|
||||
};
|
||||
|
||||
foreach (var pattern in internationalPatterns)
|
||||
{
|
||||
if (lower.Contains(pattern))
|
||||
{
|
||||
return ("International", companyName);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to verify compound company names by detecting if multiple companies are mentioned.
|
||||
/// Only triggers for names with potential separators (/, &, "and") to avoid unnecessary AI calls.
|
||||
@@ -920,8 +1356,18 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extract core identifiers from the original company name
|
||||
var originalCoreWords = ExtractCoreIdentifiers(companyName);
|
||||
|
||||
var matches = cachedCompanies
|
||||
.Where(c => !string.IsNullOrWhiteSpace(c.CompanyName))
|
||||
.Where(c =>
|
||||
{
|
||||
// All original core words must appear in the cached company name
|
||||
var cachedTitle = c.CompanyName.ToUpperInvariant();
|
||||
return originalCoreWords.Count == 0 ||
|
||||
originalCoreWords.All(w => cachedTitle.Contains(w));
|
||||
})
|
||||
.Select(c => new { Company = c, Score = Fuzz.TokenSetRatio(companyName.ToUpperInvariant(), c.CompanyName.ToUpperInvariant()) })
|
||||
.Where(m => m.Score >= FuzzyMatchThreshold)
|
||||
.OrderByDescending(m => m.Score)
|
||||
@@ -962,20 +1408,29 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
var itemTitleLower = item.Title.ToLowerInvariant();
|
||||
var itemCoreWords = ExtractCoreIdentifiers(item.Title);
|
||||
|
||||
// Validate that ALL core identifiers appear in the match
|
||||
// "Lloyds Bowmaker" must have BOTH "LLOYDS" and "BOWMAKER" in the match
|
||||
// Validate that ALL core identifiers from the ORIGINAL company name appear in the match
|
||||
// "Northwick Industries Limited" must have BOTH "NORTHWICK" and "INDUSTRIES" in the match
|
||||
// This prevents partial search queries (e.g., "Northwick") from bypassing validation
|
||||
var hasAllOriginalCores = coreWords.Count == 0 || coreWords.All(w => itemTitle.Contains(w));
|
||||
var hasAllQueryCores = queryCoreWords.Count == 0 || queryCoreWords.All(w => itemTitle.Contains(w));
|
||||
if (!hasAllOriginalCores && !hasAllQueryCores) return false;
|
||||
if (!hasAllOriginalCores) return false;
|
||||
|
||||
// Additional check: ensure the match doesn't have too many EXTRA core words
|
||||
// Additional check: ensure the match doesn't have too many EXTRA MEANINGFUL core words
|
||||
// "Families First" should NOT match "Families Against Conformity" because
|
||||
// "Against" and "Conformity" are extra significant words
|
||||
// BUT: safe expansions like "UK", "LIMITED", "COMPANY", "GROUP" don't count
|
||||
// So "Boots" -> "BOOTS UK LIMITED" is OK (no meaningful extras)
|
||||
// But "Boots" -> "BOOTS AND BEARDS" is NOT OK (BEARDS is meaningful extra)
|
||||
if (coreWords.Count > 0 && hasAllOriginalCores)
|
||||
{
|
||||
var extraWordsInMatch = itemCoreWords.Count(w => !coreWords.Contains(w));
|
||||
// If the match has more than 1 extra core word, it's likely a different company
|
||||
if (extraWordsInMatch > 1 && itemCoreWords.Count > coreWords.Count + 1)
|
||||
// Count only MEANINGFUL extra words (not safe expansions)
|
||||
var meaningfulExtras = CountMeaningfulExtraWords(coreWords, itemCoreWords);
|
||||
var isShortName = coreWords.Count <= 2;
|
||||
|
||||
// For short names: no meaningful extras allowed (prevents "Boots" → "BOOTS AND BEARDS")
|
||||
// For longer names: allow up to 1 meaningful extra
|
||||
var maxAllowedExtras = isShortName ? 0 : 1;
|
||||
|
||||
if (meaningfulExtras > maxAllowedExtras)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@@ -1475,6 +1930,88 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
return System.Text.RegularExpressions.Regex.IsMatch(text, pattern, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a CV company name matches a well-known brand and returns its info.
|
||||
/// </summary>
|
||||
private static WellKnownBrand? GetWellKnownBrand(string companyName)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(companyName))
|
||||
return null;
|
||||
|
||||
// Try exact match first
|
||||
if (WellKnownBrands.TryGetValue(companyName.Trim(), out var brand))
|
||||
return brand;
|
||||
|
||||
// Try with common suffixes removed
|
||||
var normalized = companyName.Trim();
|
||||
var suffixes = new[] { " Ltd", " Limited", " PLC", " UK", " Group" };
|
||||
foreach (var suffix in suffixes)
|
||||
{
|
||||
if (normalized.EndsWith(suffix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var withoutSuffix = normalized[..^suffix.Length].Trim();
|
||||
if (WellKnownBrands.TryGetValue(withoutSuffix, out brand))
|
||||
return brand;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a candidate company name matches a well-known brand's acceptable patterns.
|
||||
/// </summary>
|
||||
private static bool MatchesWellKnownBrandPatterns(string candidateName, WellKnownBrand brand)
|
||||
{
|
||||
var upper = candidateName.ToUpperInvariant();
|
||||
return brand.AcceptablePatterns.Any(p => upper.Contains(p.ToUpperInvariant()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts "meaningful" extra words in a candidate that aren't in the original.
|
||||
/// Excludes safe expansion words (UK, Limited, Company, Group, etc.)
|
||||
/// </summary>
|
||||
private static int CountMeaningfulExtraWords(List<string> originalCoreWords, List<string> candidateCoreWords)
|
||||
{
|
||||
var extraWords = candidateCoreWords
|
||||
.Where(w => !originalCoreWords.Any(o => o.Equals(w, StringComparison.OrdinalIgnoreCase)))
|
||||
.Where(w => !SafeExpansionWords.Contains(w))
|
||||
.ToList();
|
||||
|
||||
return extraWords.Count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if the extra words in a candidate are "safe" expansions that don't change company identity.
|
||||
/// E.g., "Boots" -> "BOOTS UK LIMITED" has only safe expansions (UK, LIMITED)
|
||||
/// E.g., "Boots" -> "BOOTS AND BEARDS" has unsafe expansion (BEARDS)
|
||||
/// </summary>
|
||||
private static bool HasOnlySafeExpansions(string originalName, string candidateName, ILogger? logger = null)
|
||||
{
|
||||
var originalCores = ExtractCoreIdentifiers(originalName);
|
||||
var candidateCores = ExtractCoreIdentifiers(candidateName);
|
||||
|
||||
// Find words in candidate that aren't in original
|
||||
var extraWords = candidateCores
|
||||
.Where(w => !originalCores.Any(o => o.Equals(w, StringComparison.OrdinalIgnoreCase)))
|
||||
.ToList();
|
||||
|
||||
// Check if all extra words are safe expansions
|
||||
var unsafeWords = extraWords
|
||||
.Where(w => !SafeExpansionWords.Contains(w))
|
||||
.ToList();
|
||||
|
||||
if (unsafeWords.Count > 0 && logger != null)
|
||||
{
|
||||
logger.LogDebug("Candidate '{Candidate}' has unsafe extra words: [{Unsafe}] (safe extras: [{Safe}])",
|
||||
candidateName,
|
||||
string.Join(", ", unsafeWords),
|
||||
string.Join(", ", extraWords.Where(w => SafeExpansionWords.Contains(w))));
|
||||
}
|
||||
|
||||
return unsafeWords.Count == 0;
|
||||
}
|
||||
|
||||
// Expanded skip words list for core identifier extraction
|
||||
// These words are too common to be meaningful differentiators between companies
|
||||
private static readonly HashSet<string> SkipWords = new(StringComparer.OrdinalIgnoreCase)
|
||||
@@ -1502,17 +2039,20 @@ public sealed class CompanyVerifierService : ICompanyVerifierService
|
||||
"company", "co", "partners", "partnership", "enterprises", "unlimited",
|
||||
"registered", "cic", "cio", "se", "ag", "gmbh", "sarl", "bv", "nv",
|
||||
|
||||
// Business descriptors
|
||||
// Business descriptors - only truly generic ones that don't identify the business type
|
||||
// Note: Removed words that can be meaningful business type identifiers:
|
||||
// - "industries", "technology", "solutions", "services", "consulting" - identify business type
|
||||
// - e.g., "Paramount Consulting" ≠ "Paramount", "Tech Solutions" ≠ "Tech"
|
||||
"group", "holdings", "holding", "parent", "subsidiary", "division", "branch",
|
||||
"services", "service", "solutions", "solution", "consulting", "consultants", "consultancy",
|
||||
"management", "systems", "system", "technologies", "technology", "tech",
|
||||
"industries", "industry", "industrial", "commercial", "trading", "trade",
|
||||
"business", "businesses", "operations", "operational", "professional", "professionals",
|
||||
"resources", "resource", "network", "networks", "associates", "associated",
|
||||
"commercial", "trading", "trade",
|
||||
"business", "businesses", "operational",
|
||||
"associated",
|
||||
|
||||
// Size/Scope descriptors
|
||||
"national", "international", "global", "worldwide", "world", "regional", "local",
|
||||
"universal", "general", "standard", "premier", "prime", "first", "one",
|
||||
"universal", "standard", "prime", "first", "one",
|
||||
// Note: Removed "general" and "premier" as they are meaningful in brand names
|
||||
// like "Legal & General", "General Electric", "Premier Inn"
|
||||
|
||||
// Quality/Marketing terms
|
||||
"new", "modern", "advanced", "innovative", "premier", "elite", "premium",
|
||||
|
||||
@@ -120,6 +120,15 @@ public sealed class TimelineAnalyserService : ITimelineAnalyserService
|
||||
var earlier = sortedEmployment[i];
|
||||
var later = sortedEmployment[j];
|
||||
|
||||
// Skip overlaps at the same company (internal promotions/transfers)
|
||||
if (IsSameCompany(earlier.CompanyName, later.CompanyName))
|
||||
{
|
||||
_logger.LogDebug(
|
||||
"Ignoring overlap at same company: {Company1} -> {Company2}",
|
||||
earlier.CompanyName, later.CompanyName);
|
||||
continue;
|
||||
}
|
||||
|
||||
var overlap = CalculateOverlap(earlier, later);
|
||||
|
||||
if (overlap is not null && overlap.Value.Months > AllowedOverlapMonths)
|
||||
@@ -143,6 +152,59 @@ public sealed class TimelineAnalyserService : ITimelineAnalyserService
|
||||
return overlaps;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines if two company names refer to the same company.
|
||||
/// Handles variations like "BMW" vs "BMW UK" vs "BMW Group".
|
||||
/// </summary>
|
||||
private static bool IsSameCompany(string? company1, string? company2)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(company1) || string.IsNullOrWhiteSpace(company2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Normalize names for comparison
|
||||
var name1 = NormalizeCompanyName(company1);
|
||||
var name2 = NormalizeCompanyName(company2);
|
||||
|
||||
// Exact match after normalization
|
||||
if (name1.Equals(name2, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if one contains the other (for "BMW" vs "BMW UK" cases)
|
||||
if (name1.Length >= 3 && name2.Length >= 3)
|
||||
{
|
||||
if (name1.StartsWith(name2, StringComparison.OrdinalIgnoreCase) ||
|
||||
name2.StartsWith(name1, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string NormalizeCompanyName(string name)
|
||||
{
|
||||
// Remove common suffixes and normalize
|
||||
var normalized = name.Trim();
|
||||
|
||||
string[] suffixes = ["Ltd", "Ltd.", "Limited", "PLC", "Plc", "Inc", "Inc.",
|
||||
"Corporation", "Corp", "Corp.", "UK", "Group", "(UK)", "& Co", "& Co."];
|
||||
|
||||
foreach (var suffix in suffixes)
|
||||
{
|
||||
if (normalized.EndsWith(" " + suffix, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
normalized = normalized[..^(suffix.Length + 1)].Trim();
|
||||
}
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
private static (DateOnly Start, DateOnly End, int Months)? CalculateOverlap(
|
||||
EmploymentEntry earlier,
|
||||
EmploymentEntry later)
|
||||
|
||||
13
src/RealCV.Web/Components/Layout/AuthLayout.razor
Normal file
13
src/RealCV.Web/Components/Layout/AuthLayout.razor
Normal file
@@ -0,0 +1,13 @@
|
||||
@inherits LayoutComponentBase
|
||||
|
||||
<div class="d-flex flex-column min-vh-100">
|
||||
<main class="flex-grow-1">
|
||||
@Body
|
||||
</main>
|
||||
</div>
|
||||
|
||||
<div id="blazor-error-ui" class="alert alert-danger fixed-bottom m-3" style="display: none;">
|
||||
An unhandled error has occurred.
|
||||
<a href="" class="alert-link reload">Reload</a>
|
||||
<button type="button" class="btn-close float-end dismiss" aria-label="Close"></button>
|
||||
</div>
|
||||
@@ -1,6 +1,6 @@
|
||||
@page "/account/login"
|
||||
@using RealCV.Web.Components.Layout
|
||||
@layout MainLayout
|
||||
@layout AuthLayout
|
||||
|
||||
@using Microsoft.AspNetCore.Identity
|
||||
@using RealCV.Infrastructure.Identity
|
||||
@@ -14,9 +14,9 @@
|
||||
<!-- Left side - Form -->
|
||||
<div class="auth-form-side">
|
||||
<div class="auth-form-wrapper">
|
||||
<div class="text-center mb-4">
|
||||
<div class="text-center mb-5">
|
||||
<a href="/">
|
||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" />
|
||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" style="height: 60px;" />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
@page "/account/register"
|
||||
@using RealCV.Web.Components.Layout
|
||||
@layout MainLayout
|
||||
@layout AuthLayout
|
||||
@rendermode InteractiveServer
|
||||
|
||||
@using Microsoft.AspNetCore.Identity
|
||||
@@ -16,9 +16,9 @@
|
||||
<!-- Left side - Form -->
|
||||
<div class="auth-form-side">
|
||||
<div class="auth-form-wrapper">
|
||||
<div class="text-center mb-4">
|
||||
<div class="text-center mb-5">
|
||||
<a href="/">
|
||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" />
|
||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" style="height: 60px;" />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
|
||||
@@ -151,23 +151,23 @@
|
||||
{
|
||||
<div class="file-list-item">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="file-type-icon me-3 @(file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ? "pdf" : "docx")">
|
||||
<div class="file-type-icon me-2 @(file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ? "pdf" : "docx")">
|
||||
@if (file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
|
||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||
</svg>
|
||||
}
|
||||
else
|
||||
{
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
|
||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||
</svg>
|
||||
}
|
||||
</div>
|
||||
<div class="flex-grow-1">
|
||||
<p class="mb-0 fw-medium">@file.Name</p>
|
||||
<small class="text-muted">@FormatFileSize(file.Size)</small>
|
||||
<div class="flex-grow-1 min-width-0">
|
||||
<span class="file-name">@file.Name</span>
|
||||
<span class="file-size">@FormatFileSize(file.Size)</span>
|
||||
</div>
|
||||
</div>
|
||||
<button class="btn btn-sm btn-outline-danger" @onclick="() => RemoveFile(file)">
|
||||
@@ -267,39 +267,71 @@
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.file-list {
|
||||
border: 1px solid var(--realcv-gray-200);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.file-list-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
border: 1px solid var(--realcv-gray-200);
|
||||
border-radius: 12px;
|
||||
padding: 1rem;
|
||||
margin-bottom: 0.75rem;
|
||||
padding: 0.5rem 0.75rem;
|
||||
background: var(--realcv-bg-surface);
|
||||
transition: all 0.2s ease;
|
||||
border-bottom: 1px solid var(--realcv-gray-200);
|
||||
transition: background 0.15s ease;
|
||||
}
|
||||
|
||||
.file-list-item:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.file-list-item:hover {
|
||||
border-color: var(--realcv-primary);
|
||||
box-shadow: 0 4px 12px rgba(59, 111, 212, 0.08);
|
||||
background: var(--realcv-bg-muted);
|
||||
}
|
||||
|
||||
.file-list-item:nth-child(even) {
|
||||
background: rgba(0, 0, 0, 0.015);
|
||||
}
|
||||
|
||||
.file-list-item:nth-child(even):hover {
|
||||
background: var(--realcv-bg-muted);
|
||||
}
|
||||
|
||||
.file-name {
|
||||
font-size: 0.875rem;
|
||||
font-weight: 500;
|
||||
color: var(--realcv-text-primary);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.file-size {
|
||||
font-size: 0.75rem;
|
||||
color: var(--realcv-gray-500);
|
||||
margin-left: 0.5rem;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.file-type-icon {
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
border-radius: 10px;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border-radius: 4px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.file-type-icon.pdf {
|
||||
background: linear-gradient(135deg, #fde8e8 0%, #fcd9d9 100%);
|
||||
background: #fef2f2;
|
||||
color: #dc2626;
|
||||
}
|
||||
|
||||
.file-type-icon.docx {
|
||||
background: linear-gradient(135deg, #e3ecf7 0%, #d4e4f4 100%);
|
||||
background: #eff6ff;
|
||||
color: var(--realcv-primary);
|
||||
}
|
||||
|
||||
|
||||
@@ -118,10 +118,10 @@
|
||||
else
|
||||
{
|
||||
<!-- Stats Cards -->
|
||||
<div class="row mb-4 g-4">
|
||||
<div class="row mb-3 g-3">
|
||||
<div class="col-md-4">
|
||||
<div class="card border-0 shadow-sm stat-card h-100">
|
||||
<div class="card-body p-4">
|
||||
<div class="card-body p-3">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="stat-icon stat-icon-primary me-3">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||
@@ -139,7 +139,7 @@
|
||||
</div>
|
||||
<div class="col-md-4">
|
||||
<div class="card border-0 shadow-sm stat-card h-100">
|
||||
<div class="card-body p-4">
|
||||
<div class="card-body p-3">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="stat-icon stat-icon-success me-3">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||
@@ -157,7 +157,7 @@
|
||||
</div>
|
||||
<div class="col-md-4">
|
||||
<div class="card border-0 shadow-sm stat-card h-100">
|
||||
<div class="card-body p-4">
|
||||
<div class="card-body p-3">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="stat-icon stat-icon-warning me-3">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||
@@ -176,7 +176,7 @@
|
||||
|
||||
<!-- Checks List -->
|
||||
<div class="card border-0 shadow-sm">
|
||||
<div class="card-header py-3 border-bottom" style="background-color: var(--realcv-bg-surface);">
|
||||
<div class="card-header py-2 px-3 border-bottom" style="background-color: var(--realcv-bg-surface);">
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<div class="d-flex align-items-center gap-3">
|
||||
<h5 class="mb-0 fw-bold">Recent CV Checks</h5>
|
||||
@@ -203,17 +203,17 @@
|
||||
<table class="table table-hover align-middle mb-0">
|
||||
<thead>
|
||||
<tr style="background-color: var(--realcv-bg-muted);">
|
||||
<th class="border-0 ps-3 py-3" style="width: 40px;">
|
||||
<th class="border-0 ps-3 py-2" style="width: 40px;">
|
||||
<input type="checkbox" class="form-check-input"
|
||||
checked="@IsAllSelected()"
|
||||
@onchange="ToggleSelectAll"
|
||||
title="Select all" />
|
||||
</th>
|
||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Candidate</th>
|
||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Uploaded</th>
|
||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Status</th>
|
||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Score</th>
|
||||
<th class="border-0 py-3 pe-4 text-uppercase small fw-semibold text-muted text-end" style="letter-spacing: 0.05em;">Actions</th>
|
||||
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Candidate</th>
|
||||
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Uploaded</th>
|
||||
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Status</th>
|
||||
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Score</th>
|
||||
<th class="border-0 py-2 pe-4 text-uppercase small fw-semibold text-muted text-end" style="letter-spacing: 0.05em;">Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -221,15 +221,15 @@
|
||||
{
|
||||
<tr class="@(check.Status == "Completed" ? "cursor-pointer" : "") @(_selectedIds.Contains(check.Id) ? "table-active" : "")"
|
||||
@onclick="() => ViewReport(check)">
|
||||
<td class="ps-3 py-3" @onclick:stopPropagation="true">
|
||||
<td class="ps-3 py-2" @onclick:stopPropagation="true">
|
||||
<input type="checkbox" class="form-check-input"
|
||||
checked="@_selectedIds.Contains(check.Id)"
|
||||
@onchange="() => ToggleSelection(check.Id)" />
|
||||
</td>
|
||||
<td class="py-3">
|
||||
<td class="py-2">
|
||||
<div class="d-flex align-items-center">
|
||||
<div class="file-icon-wrapper me-3">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="bi bi-file-earmark-person text-primary" viewBox="0 0 16 16">
|
||||
<div class="file-icon-wrapper me-2">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="bi bi-file-earmark-person text-primary" viewBox="0 0 16 16">
|
||||
<path d="M11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0z"/>
|
||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2v9.255S12 12 8 12s-5 1.755-5 1.755V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||
</svg>
|
||||
@@ -240,17 +240,17 @@
|
||||
</div>
|
||||
</div>
|
||||
</td>
|
||||
<td class="py-3">
|
||||
<td class="py-2">
|
||||
<div>
|
||||
<p class="mb-0 small">@check.CreatedAt.ToString("dd MMM yyyy")</p>
|
||||
<small class="text-muted">@check.CreatedAt.ToString("HH:mm")</small>
|
||||
</div>
|
||||
</td>
|
||||
<td class="py-3 text-center">
|
||||
<td class="py-2 text-center">
|
||||
@switch (check.Status)
|
||||
{
|
||||
case "Completed":
|
||||
<span class="badge rounded-pill bg-success-subtle text-success px-3 py-2">
|
||||
<span class="badge rounded-pill bg-success-subtle text-success px-2 py-1">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-check-circle-fill me-1" viewBox="0 0 16 16">
|
||||
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
|
||||
</svg>
|
||||
@@ -258,13 +258,13 @@
|
||||
</span>
|
||||
break;
|
||||
case "Processing":
|
||||
<span class="badge rounded-pill bg-primary-subtle text-primary px-3 py-2">
|
||||
<span class="badge rounded-pill bg-primary-subtle text-primary px-2 py-1">
|
||||
<span class="spinner-border spinner-border-sm me-1" role="status" style="width: 0.75rem; height: 0.75rem;"></span>
|
||||
@(check.ProcessingStage ?? "Processing")
|
||||
</span>
|
||||
break;
|
||||
case "Pending":
|
||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-3 py-2">
|
||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-2 py-1">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-clock me-1" viewBox="0 0 16 16">
|
||||
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
|
||||
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
|
||||
@@ -273,7 +273,7 @@
|
||||
</span>
|
||||
break;
|
||||
case "Failed":
|
||||
<span class="badge rounded-pill bg-danger-subtle text-danger px-3 py-2">
|
||||
<span class="badge rounded-pill bg-danger-subtle text-danger px-2 py-1">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-x-circle-fill me-1" viewBox="0 0 16 16">
|
||||
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
|
||||
</svg>
|
||||
@@ -281,11 +281,11 @@
|
||||
</span>
|
||||
break;
|
||||
default:
|
||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-3 py-2">@check.Status</span>
|
||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-2 py-1">@check.Status</span>
|
||||
break;
|
||||
}
|
||||
</td>
|
||||
<td class="py-3 text-center">
|
||||
<td class="py-2 text-center">
|
||||
@if (check.VeracityScore.HasValue)
|
||||
{
|
||||
<div class="score-ring-container" title="Veracity Score: @check.VeracityScore%">
|
||||
@@ -303,7 +303,7 @@
|
||||
<span class="text-muted">--</span>
|
||||
}
|
||||
</td>
|
||||
<td class="py-3 pe-4 text-end">
|
||||
<td class="py-2 pe-4 text-end">
|
||||
<div class="d-flex justify-content-end align-items-center gap-2">
|
||||
@if (check.Status == "Completed")
|
||||
{
|
||||
@@ -424,10 +424,10 @@
|
||||
}
|
||||
|
||||
.file-icon-wrapper {
|
||||
width: 44px;
|
||||
height: 44px;
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
background: linear-gradient(135deg, #e8f1fa 0%, #d4e4f4 100%);
|
||||
border-radius: 10px;
|
||||
border-radius: 8px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@@ -435,8 +435,8 @@
|
||||
|
||||
.score-ring-container {
|
||||
position: relative;
|
||||
width: 52px;
|
||||
height: 52px;
|
||||
width: 44px;
|
||||
height: 44px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
<path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>
|
||||
</svg>
|
||||
<h4 class="mb-2">Processing Failed</h4>
|
||||
<p class="text-muted">We encountered an error processing your CV. Please try uploading again.</p>
|
||||
<p class="text-muted">@(!string.IsNullOrEmpty(_check.ProcessingStage) ? _check.ProcessingStage : "We encountered an error processing your CV. Please try uploading again.")</p>
|
||||
}
|
||||
|
||||
<p class="text-muted small mt-4">
|
||||
|
||||
@@ -1051,8 +1051,12 @@ h1:focus {
|
||||
}
|
||||
|
||||
.auth-logo {
|
||||
height: 48px;
|
||||
margin-bottom: 1rem;
|
||||
height: 60px;
|
||||
transition: opacity var(--realcv-transition);
|
||||
}
|
||||
|
||||
.auth-logo:hover {
|
||||
opacity: 0.85;
|
||||
}
|
||||
|
||||
.auth-title {
|
||||
|
||||
@@ -53,22 +53,12 @@ public class CVBatchTester
|
||||
options.UseSqlServer(connectionString));
|
||||
|
||||
// Companies House
|
||||
services.Configure<CompaniesHouseSettings>(options =>
|
||||
{
|
||||
options.BaseUrl = configuration["CompaniesHouse:BaseUrl"] ?? "https://api.company-information.service.gov.uk";
|
||||
options.ApiKey = configuration["CompaniesHouse:ApiKey"] ?? "";
|
||||
});
|
||||
|
||||
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
|
||||
services.AddHttpClient<CompaniesHouseClient>();
|
||||
|
||||
// Anthropic (for AI matching)
|
||||
services.Configure<AnthropicSettings>(options =>
|
||||
{
|
||||
options.ApiKey = configuration["Anthropic:ApiKey"] ?? "";
|
||||
});
|
||||
|
||||
services.AddHttpClient<AnthropicClient>();
|
||||
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
|
||||
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
|
||||
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
||||
|
||||
// Services
|
||||
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||
@@ -142,7 +132,7 @@ public class CVBatchTester
|
||||
var summary = new CVVerificationSummary
|
||||
{
|
||||
FileName = Path.GetFileName(filePath),
|
||||
CandidateName = parsedCV.PersonalInfo?.FullName ?? "Unknown"
|
||||
CandidateName = parsedCV.FullName ?? "Unknown"
|
||||
};
|
||||
|
||||
// Verify employers
|
||||
|
||||
@@ -76,8 +76,9 @@ public class CompanyVerifierServiceTests : IDisposable
|
||||
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
|
||||
It.IsAny<string>(),
|
||||
It.IsAny<List<CompanyCandidate>>(),
|
||||
It.IsAny<string?>(),
|
||||
It.IsAny<CancellationToken>()))
|
||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
|
||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, string? industryHint, CancellationToken _) =>
|
||||
{
|
||||
// Find exact or close match in candidates
|
||||
var exactMatch = candidates.FirstOrDefault(c =>
|
||||
|
||||
179
tests/RealCV.Tests/Services/CompoundNameDetectionTests.cs
Normal file
179
tests/RealCV.Tests/Services/CompoundNameDetectionTests.cs
Normal file
@@ -0,0 +1,179 @@
|
||||
using FluentAssertions;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using RealCV.Infrastructure.Configuration;
|
||||
using RealCV.Infrastructure.Services;
|
||||
|
||||
namespace RealCV.Tests.Services;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for the rule-based compound company name detection.
|
||||
/// </summary>
|
||||
public sealed class CompoundNameDetectionTests
|
||||
{
|
||||
private readonly AICompanyNameMatcherService _sut;
|
||||
|
||||
public CompoundNameDetectionTests()
|
||||
{
|
||||
var settings = Options.Create(new AnthropicSettings { ApiKey = "test-key" });
|
||||
_sut = new AICompanyNameMatcherService(settings, NullLogger<AICompanyNameMatcherService>.Instance);
|
||||
}
|
||||
|
||||
#region Known Single Companies (should NOT be split)
|
||||
|
||||
[Theory]
|
||||
[InlineData("Ernst & Young")]
|
||||
[InlineData("Ernst and Young")]
|
||||
[InlineData("Marks & Spencer")]
|
||||
[InlineData("Marks and Spencer")]
|
||||
[InlineData("Procter & Gamble")]
|
||||
[InlineData("Johnson & Johnson")]
|
||||
[InlineData("Deloitte and Touche")]
|
||||
[InlineData("Allen & Overy")]
|
||||
[InlineData("Slaughter and May")]
|
||||
[InlineData("Holland & Barrett")]
|
||||
[InlineData("Smith & Nephew")]
|
||||
[InlineData("AT&T")]
|
||||
[InlineData("M&S")]
|
||||
public async Task ExtractCompanyNamesAsync_KnownSingleCompany_ReturnsNull(string companyName)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull($"'{companyName}' is a known single company and should not be split");
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("Ernst & Young LLP")]
|
||||
[InlineData("Marks & Spencer PLC")]
|
||||
[InlineData("Procter & Gamble UK")]
|
||||
[InlineData("Johnson & Johnson Medical")]
|
||||
public async Task ExtractCompanyNamesAsync_KnownSingleCompanyWithSuffix_ReturnsNull(string companyName)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull($"'{companyName}' contains a known single company and should not be split");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Department/Division Patterns (should NOT be split)
|
||||
|
||||
[Theory]
|
||||
[InlineData("Tesco Stores and Distribution")]
|
||||
[InlineData("BMW UK and Ireland")]
|
||||
[InlineData("Google Europe and Middle East")]
|
||||
[InlineData("Sales and Marketing")]
|
||||
[InlineData("Research and Development")]
|
||||
[InlineData("Finance and Operations")]
|
||||
public async Task ExtractCompanyNamesAsync_DepartmentPattern_ReturnsNull(string companyName)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull($"'{companyName}' looks like departments/divisions and should not be split");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Compound Names with Slash (SHOULD be split)
|
||||
|
||||
[Theory]
|
||||
[InlineData("ASDA/WALMART", new[] { "ASDA", "WALMART" })]
|
||||
[InlineData("BBC/ITV", new[] { "BBC", "ITV" })]
|
||||
[InlineData("Tesco/Sainsbury's", new[] { "Tesco", "Sainsbury's" })]
|
||||
[InlineData("Microsoft/Google", new[] { "Microsoft", "Google" })]
|
||||
public async Task ExtractCompanyNamesAsync_SlashSeparated_ReturnsParts(string companyName, string[] expectedParts)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull($"'{companyName}' contains '/' and should be split");
|
||||
result.Should().BeEquivalentTo(expectedParts);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Compound Names with And/Ampersand
|
||||
|
||||
[Theory]
|
||||
[InlineData("Acme Ltd & Beta Ltd", new[] { "Acme Ltd", "Beta Ltd" })]
|
||||
public async Task ExtractCompanyNamesAsync_BothPartsHaveCompanySuffix_ReturnsParts(string companyName, string[] expectedParts)
|
||||
{
|
||||
// When both parts clearly have company suffixes (Ltd, PLC, etc.), split them
|
||||
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().NotBeNull($"'{companyName}' has company suffixes on both parts");
|
||||
result.Should().BeEquivalentTo(expectedParts);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("Corus & Laura Ashley Hotels")] // Ambiguous - neither has company suffix
|
||||
[InlineData("Smith & Jones Consulting")] // Could be a single partnership
|
||||
[InlineData("Acme PLC and Beta PLC")] // Matches " plc and " department pattern
|
||||
public async Task ExtractCompanyNamesAsync_AmbiguousWithAnd_ReturnsNull(string companyName)
|
||||
{
|
||||
// Rule-based system is conservative with ambiguous & and "and" cases
|
||||
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull($"'{companyName}' is ambiguous and should not be split");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Edge Cases
|
||||
|
||||
[Theory]
|
||||
[InlineData("")]
|
||||
[InlineData(" ")]
|
||||
[InlineData(null)]
|
||||
public async Task ExtractCompanyNamesAsync_EmptyOrNull_ReturnsNull(string? companyName)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName!);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull();
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("Microsoft")]
|
||||
[InlineData("Google")]
|
||||
[InlineData("Amazon")]
|
||||
[InlineData("Apple Inc")]
|
||||
[InlineData("Tesco PLC")]
|
||||
public async Task ExtractCompanyNamesAsync_SimpleCompanyName_ReturnsNull(string companyName)
|
||||
{
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull($"'{companyName}' is a simple company name and should not be split");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ExtractCompanyNamesAsync_ShortParts_ReturnsNull()
|
||||
{
|
||||
// Arrange - Parts too short to be valid company names
|
||||
var companyName = "A & B";
|
||||
|
||||
// Act
|
||||
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||
|
||||
// Assert
|
||||
result.Should().BeNull("parts are too short to be valid company names");
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
@@ -51,7 +51,7 @@ public sealed class EducationVerifierServiceTests
|
||||
var result = _sut.Verify(education);
|
||||
|
||||
// Assert
|
||||
result.VerificationNotes.Should().Contain("diploma mill blacklist");
|
||||
result.VerificationNotes.Should().Contain("not found in accredited institutions");
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
@@ -12,23 +13,86 @@ using RealCV.Infrastructure.Services;
|
||||
|
||||
namespace CVBatchTester;
|
||||
|
||||
// DTOs for test JSON format (snake_case with nested personal object)
|
||||
record TestCVData
|
||||
{
|
||||
public string? CvId { get; init; }
|
||||
public string? Category { get; init; }
|
||||
public List<string>? ExpectedFlags { get; init; }
|
||||
public TestPersonalData? Personal { get; init; }
|
||||
public string? Profile { get; init; }
|
||||
public List<TestEmploymentEntry>? Employment { get; init; }
|
||||
public List<TestEducationEntry>? Education { get; init; }
|
||||
public List<string>? Skills { get; init; }
|
||||
}
|
||||
|
||||
record TestPersonalData
|
||||
{
|
||||
public string? Name { get; init; }
|
||||
public string? Email { get; init; }
|
||||
public string? Phone { get; init; }
|
||||
public string? Address { get; init; }
|
||||
public string? LinkedIn { get; init; }
|
||||
}
|
||||
|
||||
record TestEmploymentEntry
|
||||
{
|
||||
public string? Company { get; init; }
|
||||
public string? JobTitle { get; init; }
|
||||
public string? StartDate { get; init; }
|
||||
public string? EndDate { get; init; }
|
||||
public string? Location { get; init; }
|
||||
public string? Description { get; init; }
|
||||
public List<string>? Achievements { get; init; }
|
||||
}
|
||||
|
||||
record TestEducationEntry
|
||||
{
|
||||
public string? Institution { get; init; }
|
||||
public string? Qualification { get; init; }
|
||||
public string? Subject { get; init; }
|
||||
public string? Classification { get; init; }
|
||||
public string? StartDate { get; init; }
|
||||
public string? EndDate { get; init; }
|
||||
}
|
||||
|
||||
class Program
|
||||
{
|
||||
private static StreamWriter? _logWriter;
|
||||
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||
Converters = { new JsonStringEnumConverter() }
|
||||
};
|
||||
|
||||
static async Task<int> Main(string[] args)
|
||||
{
|
||||
var folderPath = args.FirstOrDefault() ?? AskForFolder();
|
||||
|
||||
if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath))
|
||||
{
|
||||
Console.WriteLine($"Error: Folder not found: {folderPath}");
|
||||
Console.WriteLine("Usage: CVBatchTester <folder-path>");
|
||||
Console.WriteLine(" e.g. CVBatchTester /home/user/cvs");
|
||||
Log($"Error: Folder not found: {folderPath}");
|
||||
Log("Usage: CVBatchTester <folder-path> [--output <file>]");
|
||||
Log(" e.g. CVBatchTester /home/user/cvs");
|
||||
Log(" e.g. CVBatchTester /home/user/cvs --output /tmp/results.log");
|
||||
return 1;
|
||||
}
|
||||
|
||||
Console.WriteLine($"CV Batch Verification Tester");
|
||||
Console.WriteLine($"Processing CVs from: {folderPath}");
|
||||
Console.WriteLine(new string('=', 80));
|
||||
// Check for --output flag
|
||||
var outputIndex = Array.IndexOf(args, "--output");
|
||||
var logPath = outputIndex >= 0 && outputIndex < args.Length - 1
|
||||
? args[outputIndex + 1]
|
||||
: Path.Combine(folderPath, $"batch-results-{DateTime.Now:yyyyMMdd-HHmmss}.log");
|
||||
|
||||
_logWriter = new StreamWriter(logPath, false) { AutoFlush = true };
|
||||
|
||||
Log($"CV Batch Verification Tester");
|
||||
Log($"Processing CVs from: {folderPath}");
|
||||
Log($"Output log: {logPath}");
|
||||
Log($"Started: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
|
||||
Log(new string('=', 80));
|
||||
|
||||
// Setup DI
|
||||
var services = new ServiceCollection();
|
||||
@@ -39,15 +103,16 @@ class Program
|
||||
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
||||
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
||||
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase))
|
||||
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase) ||
|
||||
f.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
|
||||
.OrderBy(f => f)
|
||||
.ToList();
|
||||
|
||||
Console.WriteLine($"Found {cvFiles.Count} CV files\n");
|
||||
Log($"Found {cvFiles.Count} CV files\n");
|
||||
|
||||
if (cvFiles.Count == 0)
|
||||
{
|
||||
Console.WriteLine("No CV files found (.pdf, .docx, .doc)");
|
||||
Log("No CV files found (.pdf, .docx, .doc, .json)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -63,9 +128,9 @@ class Program
|
||||
|
||||
foreach (var cvFile in cvFiles)
|
||||
{
|
||||
Console.WriteLine($"\n{new string('=', 80)}");
|
||||
Console.WriteLine($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
|
||||
Console.WriteLine(new string('=', 80));
|
||||
Log($"\n{new string('=', 80)}");
|
||||
Log($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
|
||||
Log(new string('=', 80));
|
||||
|
||||
try
|
||||
{
|
||||
@@ -74,17 +139,30 @@ class Program
|
||||
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||
|
||||
// Parse CV
|
||||
await using var stream = File.OpenRead(cvFile);
|
||||
var cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||
// Parse CV - handle JSON files differently
|
||||
CVData cv;
|
||||
if (cvFile.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var jsonContent = await File.ReadAllTextAsync(cvFile);
|
||||
var testCv = JsonSerializer.Deserialize<TestCVData>(jsonContent, JsonOptions)
|
||||
?? throw new InvalidOperationException("Failed to deserialize JSON CV");
|
||||
|
||||
Console.WriteLine($"Candidate: {cv.FullName}");
|
||||
// Convert TestCVData to CVData
|
||||
cv = ConvertTestCVData(testCv);
|
||||
Log($"Loaded JSON CV: {cv.FullName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
await using var stream = File.OpenRead(cvFile);
|
||||
cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||
Log($"Parsed CV: {cv.FullName}");
|
||||
}
|
||||
|
||||
// Verify Employers
|
||||
if (cv.Employment?.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||
Console.WriteLine(new string('-', 60));
|
||||
Log($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||
Log(new string('-', 60));
|
||||
|
||||
foreach (var emp in cv.Employment)
|
||||
{
|
||||
@@ -100,18 +178,18 @@ class Program
|
||||
var icon = result.IsVerified ? "✓" : "✗";
|
||||
var period = FormatPeriod(emp.StartDate, emp.EndDate);
|
||||
|
||||
Console.WriteLine($"\n {icon} {emp.CompanyName}");
|
||||
Console.WriteLine($" Period: {period}");
|
||||
Console.WriteLine($" Role: {emp.JobTitle}");
|
||||
Log($"\n {icon} {emp.CompanyName}");
|
||||
Log($" Period: {period}");
|
||||
Log($" Role: {emp.JobTitle}");
|
||||
|
||||
if (result.IsVerified)
|
||||
{
|
||||
verifiedEmployers++;
|
||||
Console.WriteLine($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||
Log($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||
if (!string.IsNullOrEmpty(result.MatchedCompanyNumber))
|
||||
Console.WriteLine($" Company #: {result.MatchedCompanyNumber}");
|
||||
Log($" Company #: {result.MatchedCompanyNumber}");
|
||||
if (!string.IsNullOrEmpty(result.CompanyStatus))
|
||||
Console.WriteLine($" Status: {result.CompanyStatus}");
|
||||
Log($" Status: {result.CompanyStatus}");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -119,12 +197,12 @@ class Program
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||
Log($" Note: {result.VerificationNotes}");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.WriteLine($"\n ✗ {emp.CompanyName}");
|
||||
Console.WriteLine($" ERROR: {ex.Message}");
|
||||
Log($"\n ✗ {emp.CompanyName}");
|
||||
Log($" ERROR: {ex.Message}");
|
||||
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||
}
|
||||
}
|
||||
@@ -133,8 +211,8 @@ class Program
|
||||
// Verify Education
|
||||
if (cv.Education?.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"\nEDUCATION ({cv.Education.Count}):");
|
||||
Console.WriteLine(new string('-', 60));
|
||||
Log($"\nEDUCATION ({cv.Education.Count}):");
|
||||
Log(new string('-', 60));
|
||||
|
||||
var eduEntries = cv.Education.Select(e => new EducationEntry
|
||||
{
|
||||
@@ -152,10 +230,10 @@ class Program
|
||||
totalEducation++;
|
||||
var icon = result.IsVerified ? "✓" : "✗";
|
||||
|
||||
Console.WriteLine($"\n {icon} {result.ClaimedInstitution}");
|
||||
Console.WriteLine($" Qualification: {result.ClaimedQualification}");
|
||||
Log($"\n {icon} {result.ClaimedInstitution}");
|
||||
Log($" Qualification: {result.ClaimedQualification}");
|
||||
if (!string.IsNullOrEmpty(result.ClaimedSubject))
|
||||
Console.WriteLine($" Subject: {result.ClaimedSubject}");
|
||||
Log($" Subject: {result.ClaimedSubject}");
|
||||
|
||||
if (result.IsVerified)
|
||||
{
|
||||
@@ -163,41 +241,41 @@ class Program
|
||||
if (result.MatchedInstitution != null &&
|
||||
!result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Console.WriteLine($" Match: {result.MatchedInstitution}");
|
||||
Log($" Match: {result.MatchedInstitution}");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
||||
Console.WriteLine($" Status: {result.Status}");
|
||||
Log($" Status: {result.Status}");
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||
Log($" Note: {result.VerificationNotes}");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
errorCount++;
|
||||
Console.WriteLine($"ERROR processing file: {ex.Message}");
|
||||
Log($"ERROR processing file: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
// Print Summary
|
||||
Console.WriteLine($"\n\n{new string('=', 80)}");
|
||||
Console.WriteLine("VERIFICATION SUMMARY");
|
||||
Console.WriteLine(new string('=', 80));
|
||||
Log($"\n\n{new string('=', 80)}");
|
||||
Log("VERIFICATION SUMMARY");
|
||||
Log(new string('=', 80));
|
||||
|
||||
Console.WriteLine($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
|
||||
Log($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
|
||||
if (errorCount > 0)
|
||||
Console.WriteLine($"Errors: {errorCount}");
|
||||
Log($"Errors: {errorCount}");
|
||||
|
||||
var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0;
|
||||
var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0;
|
||||
|
||||
Console.WriteLine($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
|
||||
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
|
||||
Log($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
|
||||
Log($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
|
||||
|
||||
// List unverified employers
|
||||
var uniqueUnverifiedEmployers = allUnverifiedEmployers
|
||||
@@ -208,12 +286,12 @@ class Program
|
||||
|
||||
if (uniqueUnverifiedEmployers.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"\n{new string('-', 60)}");
|
||||
Console.WriteLine($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
|
||||
Log($"\n{new string('-', 60)}");
|
||||
Log($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
|
||||
foreach (var group in uniqueUnverifiedEmployers)
|
||||
{
|
||||
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||
Console.WriteLine($" - {group.Key}{count}");
|
||||
Log($" - {group.Key}{count}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,19 +304,30 @@ class Program
|
||||
|
||||
if (uniqueUnverifiedInstitutions.Count > 0)
|
||||
{
|
||||
Console.WriteLine($"\n{new string('-', 60)}");
|
||||
Console.WriteLine($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
|
||||
Log($"\n{new string('-', 60)}");
|
||||
Log($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
|
||||
foreach (var group in uniqueUnverifiedInstitutions)
|
||||
{
|
||||
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||
Console.WriteLine($" - {group.Key}{count}");
|
||||
Log($" - {group.Key}{count}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine($"\n{new string('=', 80)}");
|
||||
Log($"\nCompleted: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
|
||||
Log($"\n{new string('=', 80)}");
|
||||
|
||||
_logWriter?.Close();
|
||||
Console.WriteLine($"\nResults written to: {logPath}");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void Log(string message)
|
||||
{
|
||||
Console.WriteLine(message);
|
||||
_logWriter?.WriteLine(message);
|
||||
}
|
||||
|
||||
static string AskForFolder()
|
||||
{
|
||||
Console.Write("Enter CV folder path: ");
|
||||
@@ -252,6 +341,57 @@ class Program
|
||||
return $"{startStr} - {endStr}";
|
||||
}
|
||||
|
||||
static CVData ConvertTestCVData(TestCVData testCv)
|
||||
{
|
||||
return new CVData
|
||||
{
|
||||
FullName = testCv.Personal?.Name ?? "Unknown",
|
||||
Email = testCv.Personal?.Email,
|
||||
Phone = testCv.Personal?.Phone,
|
||||
Employment = testCv.Employment?.Select(e => new EmploymentEntry
|
||||
{
|
||||
CompanyName = e.Company ?? "Unknown",
|
||||
JobTitle = e.JobTitle ?? "Unknown",
|
||||
Location = e.Location,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate),
|
||||
IsCurrent = e.EndDate == null,
|
||||
Description = e.Description
|
||||
}).ToList() ?? [],
|
||||
Education = testCv.Education?.Select(e => new EducationEntry
|
||||
{
|
||||
Institution = e.Institution ?? "Unknown",
|
||||
Qualification = e.Qualification,
|
||||
Subject = e.Subject,
|
||||
StartDate = ParseDate(e.StartDate),
|
||||
EndDate = ParseDate(e.EndDate)
|
||||
}).ToList() ?? [],
|
||||
Skills = testCv.Skills ?? []
|
||||
};
|
||||
}
|
||||
|
||||
static DateOnly? ParseDate(string? dateStr)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dateStr)) return null;
|
||||
|
||||
// Try parsing YYYY-MM format
|
||||
if (dateStr.Length == 7 && dateStr[4] == '-')
|
||||
{
|
||||
if (int.TryParse(dateStr[..4], out var year) && int.TryParse(dateStr[5..], out var month))
|
||||
{
|
||||
return new DateOnly(year, month, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Try standard parsing
|
||||
if (DateOnly.TryParse(dateStr, out var date))
|
||||
{
|
||||
return date;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
static void ConfigureServices(IServiceCollection services)
|
||||
{
|
||||
// Load configuration - try multiple locations
|
||||
@@ -263,7 +403,7 @@ class Program
|
||||
};
|
||||
|
||||
var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web";
|
||||
Console.WriteLine($"Loading config from: {webProjectPath}");
|
||||
Log($"Loading config from: {webProjectPath}");
|
||||
|
||||
var configuration = new ConfigurationBuilder()
|
||||
.SetBasePath(webProjectPath)
|
||||
@@ -272,11 +412,14 @@ class Program
|
||||
.AddJsonFile("appsettings.Production.json", optional: true)
|
||||
.Build();
|
||||
|
||||
// Logging - minimal output
|
||||
// Logging - show info level for verification details
|
||||
services.AddLogging(builder =>
|
||||
{
|
||||
builder.AddConsole();
|
||||
builder.SetMinimumLevel(LogLevel.Warning);
|
||||
builder.SetMinimumLevel(LogLevel.Information);
|
||||
// Filter out noisy libraries
|
||||
builder.AddFilter("Microsoft", LogLevel.Warning);
|
||||
builder.AddFilter("System", LogLevel.Warning);
|
||||
});
|
||||
|
||||
// Database
|
||||
|
||||
Reference in New Issue
Block a user