Compare commits
11 Commits
358b0328e7
...
develop
| Author | SHA1 | Date | |
|---|---|---|---|
| 135e774f71 | |||
| 45812420f5 | |||
| 883d9afa2d | |||
| 983fb5bd67 | |||
| 232036746f | |||
| 2a96a4bfaf | |||
| 4b87af80a8 | |||
| 9cb8c35616 | |||
| 3d666d5f9c | |||
| 94ca6e1b9a | |||
| 27921d625f |
448
src/RealCV.Application/Data/UKHistoricalEmployers.cs
Normal file
448
src/RealCV.Application/Data/UKHistoricalEmployers.cs
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
namespace RealCV.Application.Data;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Database of historical UK employers that may no longer exist under their original names.
|
||||||
|
/// Includes companies that were acquired, merged, dissolved, or renamed.
|
||||||
|
/// Also includes public sector bodies and internal divisions of larger organisations.
|
||||||
|
/// </summary>
|
||||||
|
public static class UKHistoricalEmployers
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Maps historical company names to their current/successor company information.
|
||||||
|
/// Key: Historical name (case-insensitive)
|
||||||
|
/// Value: HistoricalEmployerInfo with successor details
|
||||||
|
/// </summary>
|
||||||
|
public static readonly Dictionary<string, HistoricalEmployerInfo> HistoricalCompanies =
|
||||||
|
new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Engineering & Construction
|
||||||
|
["Foster Wheeler"] = new("Wood Group / AMEC Foster Wheeler", "Engineering contractor acquired by AMEC in 2014, now part of Wood Group", "00163609"),
|
||||||
|
["Foster Wheeler Ltd"] = new("Wood Group / AMEC Foster Wheeler", "Engineering contractor acquired by AMEC in 2014, now part of Wood Group", "00163609"),
|
||||||
|
["Foster Wheeler Limited"] = new("Wood Group / AMEC Foster Wheeler", "Engineering contractor acquired by AMEC in 2014, now part of Wood Group", "00163609"),
|
||||||
|
["Foster Wheeler PLC"] = new("Wood Group / AMEC Foster Wheeler", "Engineering contractor acquired by AMEC in 2014, now part of Wood Group", "00163609"),
|
||||||
|
["Sir Alexander Gibb and Partners"] = new("Jacobs Engineering", "Historic engineering consultancy (founded 1922), acquired by Jacobs", null),
|
||||||
|
["Alexander Gibb and Partners"] = new("Jacobs Engineering", "Historic engineering consultancy (founded 1922), acquired by Jacobs", null),
|
||||||
|
["Gibb and Partners"] = new("Jacobs Engineering", "Historic engineering consultancy, acquired by Jacobs", null),
|
||||||
|
["Mott MacDonald"] = new("Mott MacDonald", "Still trading - major engineering consultancy", "01243967"),
|
||||||
|
["Ove Arup"] = new("Arup", "Still trading as Arup", "01312453"),
|
||||||
|
["Arup"] = new("Arup", "Major engineering consultancy", "01312453"),
|
||||||
|
["WS Atkins"] = new("SNC-Lavalin / Atkins", "Acquired by SNC-Lavalin in 2017", "01885586"),
|
||||||
|
["Atkins"] = new("SNC-Lavalin / Atkins", "Acquired by SNC-Lavalin in 2017", "01885586"),
|
||||||
|
|
||||||
|
// Pharmaceuticals
|
||||||
|
["Glaxo"] = new("GlaxoSmithKline (GSK)", "Merged with SmithKline Beecham in 2000 to form GSK", "03888792"),
|
||||||
|
["Glaxo Research & Development"] = new("GlaxoSmithKline (GSK)", "Glaxo R&D subsidiary, merged into GSK in 2000", "03888792"),
|
||||||
|
["Glaxo Research & Development Ltd"] = new("GlaxoSmithKline (GSK)", "Glaxo R&D subsidiary, merged into GSK in 2000", "03888792"),
|
||||||
|
["Glaxo Research and Development"] = new("GlaxoSmithKline (GSK)", "Glaxo R&D subsidiary, merged into GSK in 2000", "03888792"),
|
||||||
|
["Glaxo Wellcome"] = new("GlaxoSmithKline (GSK)", "Formed 1995 (Glaxo + Wellcome), merged with SmithKline Beecham 2000", "03888792"),
|
||||||
|
["SmithKline Beecham"] = new("GlaxoSmithKline (GSK)", "Merged with Glaxo Wellcome in 2000 to form GSK", "03888792"),
|
||||||
|
["Beecham"] = new("GlaxoSmithKline (GSK)", "Merged to form SmithKline Beecham, then GSK", "03888792"),
|
||||||
|
["Wellcome"] = new("GlaxoSmithKline (GSK)", "Acquired by Glaxo in 1995", "03888792"),
|
||||||
|
["ICI Pharmaceuticals"] = new("AstraZeneca", "ICI pharma division became Zeneca, merged with Astra 1999", "02723534"),
|
||||||
|
["Zeneca"] = new("AstraZeneca", "Merged with Astra in 1999", "02723534"),
|
||||||
|
|
||||||
|
// Banking & Finance (historical names)
|
||||||
|
["Midland Bank"] = new("HSBC UK", "Acquired by HSBC in 1992", "00014259"),
|
||||||
|
["National Westminster Bank"] = new("NatWest (RBS Group)", "Acquired by RBS in 2000", "00929027"),
|
||||||
|
["NatWest"] = new("NatWest Group", "Part of NatWest Group (formerly RBS)", "00929027"),
|
||||||
|
["Lloyds Bank"] = new("Lloyds Banking Group", "Part of Lloyds Banking Group", "00002065"),
|
||||||
|
["Lloyds TSB"] = new("Lloyds Banking Group", "Rebranded to Lloyds Bank in 2013", "00002065"),
|
||||||
|
["TSB"] = new("TSB Bank", "Demerged from Lloyds in 2013, acquired by Sabadell", "SC205310"),
|
||||||
|
["Halifax"] = new("Halifax (Lloyds Banking Group)", "Part of Lloyds Banking Group since 2009", "02367076"),
|
||||||
|
["HBOS"] = new("Lloyds Banking Group", "Acquired by Lloyds in 2009", "SC218813"),
|
||||||
|
["Bank of Scotland"] = new("Bank of Scotland (Lloyds Banking Group)", "Part of Lloyds Banking Group", "SC327000"),
|
||||||
|
["Abbey National"] = new("Santander UK", "Acquired by Santander in 2004", "02294747"),
|
||||||
|
["Alliance & Leicester"] = new("Santander UK", "Acquired by Santander in 2008", "03263713"),
|
||||||
|
["Bradford & Bingley"] = new("Santander UK (savings) / UKAR (mortgages)", "Nationalised 2008, split up", "00189520"),
|
||||||
|
["Northern Rock"] = new("Virgin Money UK", "Nationalised 2008, sold to Virgin Money 2012", "03273685"),
|
||||||
|
|
||||||
|
// Retail
|
||||||
|
["Woolworths"] = new("Dissolved", "UK Woolworths went into administration in 2008", "00106966"),
|
||||||
|
["British Home Stores"] = new("Dissolved", "BHS went into administration in 2016", "00229606"),
|
||||||
|
["BHS"] = new("Dissolved", "BHS went into administration in 2016", "00229606"),
|
||||||
|
["Littlewoods"] = new("Shop Direct / The Very Group", "Stores closed, online business continued", null),
|
||||||
|
["Comet"] = new("Dissolved", "Electrical retailer went into administration in 2012", "00abortedte"),
|
||||||
|
["MFI"] = new("Dissolved", "Furniture retailer went into administration in 2008", null),
|
||||||
|
["Courts"] = new("Dissolved", "Furniture retailer ceased UK operations", null),
|
||||||
|
["Safeway"] = new("Morrisons", "UK stores acquired by Morrisons in 2004", "00358949"),
|
||||||
|
["Kwik Save"] = new("Dissolved", "Supermarket chain dissolved in 2007", null),
|
||||||
|
["Fine Fare"] = new("Dissolved", "Supermarket chain - stores sold to various buyers", null),
|
||||||
|
["Gateway"] = new("Somerfield / Co-op", "Became Somerfield, then acquired by Co-op", null),
|
||||||
|
["Somerfield"] = new("Co-operative Group", "Acquired by Co-op in 2009", null),
|
||||||
|
|
||||||
|
// Telecoms
|
||||||
|
["British Telecom"] = new("BT Group", "Rebranded to BT", "01800000"),
|
||||||
|
["GPO Telephones"] = new("BT Group", "Became British Telecom, then BT", "01800000"),
|
||||||
|
["Mercury Communications"] = new("Cable & Wireless / Vodafone", "Merged into Cable & Wireless, later Vodafone", null),
|
||||||
|
["Cellnet"] = new("O2 (Virgin Media O2)", "Became BT Cellnet, then O2", null),
|
||||||
|
["Orange"] = new("EE (BT)", "Merged with T-Mobile to form EE, acquired by BT", null),
|
||||||
|
["T-Mobile UK"] = new("EE (BT)", "Merged with Orange to form EE", null),
|
||||||
|
["One2One"] = new("EE (BT)", "Became T-Mobile UK, then EE", null),
|
||||||
|
|
||||||
|
// Utilities
|
||||||
|
["Central Electricity Generating Board"] = new("National Grid / Various generators", "CEGB privatised and split in 1990", null),
|
||||||
|
["CEGB"] = new("National Grid / Various generators", "CEGB privatised and split in 1990", null),
|
||||||
|
["British Gas"] = new("Centrica / National Grid", "Demerged in 1997", "00029782"),
|
||||||
|
["Eastern Electricity"] = new("EDF Energy", "Privatised, now part of EDF", null),
|
||||||
|
["London Electricity"] = new("EDF Energy", "Privatised, now part of EDF", null),
|
||||||
|
["SEEBOARD"] = new("EDF Energy", "Privatised, now part of EDF", null),
|
||||||
|
["PowerGen"] = new("E.ON UK", "Acquired by E.ON", null),
|
||||||
|
["National Power"] = new("RWE npower / Innogy", "Split and acquired", null),
|
||||||
|
|
||||||
|
// Manufacturing & Industrial
|
||||||
|
["British Steel"] = new("Tata Steel UK / British Steel (2016)", "Privatised, acquired by Corus then Tata, British Steel name revived 2016", "12303256"),
|
||||||
|
["British Steel Corporation"] = new("Tata Steel UK / British Steel (2016)", "Nationalised steel industry, privatised 1988", "12303256"),
|
||||||
|
["British Steel plc"] = new("Tata Steel UK / British Steel (2016)", "Merged with Hoogovens to form Corus 1999", "12303256"),
|
||||||
|
["Corus"] = new("Tata Steel UK", "Acquired by Tata Steel in 2007", null),
|
||||||
|
["British Leyland"] = new("Various (BMW, Tata, etc.)", "Split up - brands went to various owners", null),
|
||||||
|
["Rover Group"] = new("Dissolved", "Final owner MG Rover went bankrupt 2005", null),
|
||||||
|
["MG Rover"] = new("Dissolved", "Went into administration in 2005", null),
|
||||||
|
["Austin Rover"] = new("Dissolved", "Part of British Leyland, became Rover Group", null),
|
||||||
|
["British Aerospace"] = new("BAE Systems", "Merged with Marconi Electronic Systems in 1999", "01470151"),
|
||||||
|
["BAe"] = new("BAE Systems", "Merged with Marconi Electronic Systems in 1999", "01470151"),
|
||||||
|
["Marconi"] = new("BAE Systems / Ericsson", "Defence division to BAE, telecoms to Ericsson", null),
|
||||||
|
["GEC"] = new("Various", "General Electric Company (UK) - broken up", null),
|
||||||
|
["GEC Marconi"] = new("BAE Systems", "Defence business became part of BAE Systems", "01470151"),
|
||||||
|
["Plessey"] = new("Siemens / various", "Broken up in 1989", null),
|
||||||
|
["ICL"] = new("Fujitsu", "Acquired by Fujitsu", null),
|
||||||
|
["International Computers Limited"] = new("Fujitsu", "Acquired by Fujitsu in 2002", null),
|
||||||
|
["Ferranti"] = new("Dissolved", "Collapsed in 1993 after fraud scandal", null),
|
||||||
|
|
||||||
|
// Oil & Gas
|
||||||
|
["British Petroleum"] = new("BP", "Rebranded to BP", "00102498"),
|
||||||
|
["BP Amoco"] = new("BP", "Merged 1998, rebranded to just BP", "00102498"),
|
||||||
|
["Enterprise Oil"] = new("Shell", "Acquired by Shell in 2002", null),
|
||||||
|
["Lasmo"] = new("Eni", "Acquired by Eni in 2001", null),
|
||||||
|
["Britoil"] = new("BP", "Acquired by BP in 1988", null),
|
||||||
|
|
||||||
|
// Transport
|
||||||
|
["British Rail"] = new("Various (Network Rail, TOCs)", "Privatised and split in 1990s", null),
|
||||||
|
["British Railways"] = new("Various (Network Rail, TOCs)", "Became British Rail, then privatised", null),
|
||||||
|
["Railtrack"] = new("Network Rail", "Replaced by Network Rail in 2002", "04402220"),
|
||||||
|
["British Airways"] = new("British Airways (IAG)", "Now part of International Airlines Group", "01777777"),
|
||||||
|
["British Caledonian"] = new("British Airways", "Acquired by BA in 1987", null),
|
||||||
|
["British European Airways"] = new("British Airways", "Merged with BOAC to form BA in 1974", null),
|
||||||
|
["BEA"] = new("British Airways", "Merged with BOAC to form BA in 1974", null),
|
||||||
|
["BOAC"] = new("British Airways", "Merged with BEA to form BA in 1974", null),
|
||||||
|
["British Overseas Airways Corporation"] = new("British Airways", "Merged with BEA to form BA in 1974", null),
|
||||||
|
["Dan-Air"] = new("British Airways", "Acquired by BA in 1992", null),
|
||||||
|
|
||||||
|
// Media
|
||||||
|
["Thames Television"] = new("Fremantle", "Lost franchise 1991, production continued", null),
|
||||||
|
["Granada Television"] = new("ITV plc", "Merged to form ITV plc", "04967001"),
|
||||||
|
["Carlton Television"] = new("ITV plc", "Merged with Granada to form ITV", "04967001"),
|
||||||
|
["Yorkshire Television"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
["Tyne Tees Television"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
["Central Television"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
["Anglia Television"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
["HTV"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
["LWT"] = new("ITV plc", "London Weekend Television, part of ITV", "04967001"),
|
||||||
|
["London Weekend Television"] = new("ITV plc", "Part of ITV plc", "04967001"),
|
||||||
|
|
||||||
|
// Construction
|
||||||
|
["Wimpey"] = new("Taylor Wimpey", "Merged with Taylor Woodrow in 2007", "00296805"),
|
||||||
|
["Taylor Woodrow"] = new("Taylor Wimpey", "Merged with Wimpey in 2007", "00296805"),
|
||||||
|
["John Laing"] = new("John Laing Group (infrastructure)", "Construction sold, now infrastructure investor", "05975300"),
|
||||||
|
["Costain Group"] = new("Costain", "Still trading", "00102921"),
|
||||||
|
["Tarmac"] = new("Tarmac (CRH)", "Construction now part of CRH", null),
|
||||||
|
["Alfred McAlpine"] = new("Carillion (dissolved)", "Acquired by Carillion, which collapsed 2018", null),
|
||||||
|
["Carillion"] = new("Dissolved", "Collapsed into liquidation in 2018", "03782379"),
|
||||||
|
["Mowlem"] = new("Carillion (dissolved)", "Acquired by Carillion in 2006", null),
|
||||||
|
["Balfour Beatty"] = new("Balfour Beatty", "Still trading", "00395826"),
|
||||||
|
|
||||||
|
// Insurance
|
||||||
|
["Royal Insurance"] = new("RSA Insurance Group", "Merged with Sun Alliance", "02339826"),
|
||||||
|
["Sun Alliance"] = new("RSA Insurance Group", "Merged with Royal Insurance", "02339826"),
|
||||||
|
["Guardian Royal Exchange"] = new("AXA", "Acquired by AXA in 1999", null),
|
||||||
|
["Commercial Union"] = new("Aviva", "Merged to form CGU, then Aviva", "02468686"),
|
||||||
|
["General Accident"] = new("Aviva", "Merged to form CGU, then Aviva", "02468686"),
|
||||||
|
["CGU"] = new("Aviva", "Rebranded to Aviva in 2002", "02468686"),
|
||||||
|
["Norwich Union"] = new("Aviva", "Rebranded to Aviva in 2009", "02468686"),
|
||||||
|
["Eagle Star"] = new("Zurich", "Acquired by Zurich", null),
|
||||||
|
["Prudential"] = new("Prudential plc / M&G", "UK business demerged as M&G plc", "01397169"),
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Major UK charities and non-profit organisations.
|
||||||
|
/// These are legitimate employers but may not be found via standard company search.
|
||||||
|
/// </summary>
|
||||||
|
public static readonly HashSet<string> CharityEmployers = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Youth organisations
|
||||||
|
"Girlguiding",
|
||||||
|
"Girlguiding UK",
|
||||||
|
"Girlguiding North East England",
|
||||||
|
"Girl Guides",
|
||||||
|
"Scouts",
|
||||||
|
"Scout Association",
|
||||||
|
"Boys Brigade",
|
||||||
|
"Girls Brigade",
|
||||||
|
"Cadets",
|
||||||
|
"Sea Cadets",
|
||||||
|
"Air Cadets",
|
||||||
|
"Army Cadets",
|
||||||
|
|
||||||
|
// Major charities
|
||||||
|
"British Red Cross",
|
||||||
|
"Oxfam",
|
||||||
|
"Save the Children",
|
||||||
|
"NSPCC",
|
||||||
|
"Barnardo's",
|
||||||
|
"RSPCA",
|
||||||
|
"RSPB",
|
||||||
|
"National Trust",
|
||||||
|
"Cancer Research UK",
|
||||||
|
"British Heart Foundation",
|
||||||
|
"Macmillan Cancer Support",
|
||||||
|
"Marie Curie",
|
||||||
|
"Age UK",
|
||||||
|
"Mind",
|
||||||
|
"Samaritans",
|
||||||
|
"Shelter",
|
||||||
|
"Citizens Advice",
|
||||||
|
"Citizens Advice Bureau",
|
||||||
|
"CAB",
|
||||||
|
"St John Ambulance",
|
||||||
|
"Salvation Army",
|
||||||
|
"YMCA",
|
||||||
|
"YWCA",
|
||||||
|
|
||||||
|
// Religious organisations
|
||||||
|
"Church of England",
|
||||||
|
"Catholic Church",
|
||||||
|
"Methodist Church",
|
||||||
|
"Baptist Church",
|
||||||
|
"Salvation Army",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Public sector organisations and government bodies.
|
||||||
|
/// These are legitimate employers but not registered at Companies House.
|
||||||
|
/// </summary>
|
||||||
|
public static readonly HashSet<string> PublicSectorEmployers = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Emergency Services
|
||||||
|
"Metropolitan Police",
|
||||||
|
"Metropolitan Police Service",
|
||||||
|
"Metropolitan Police Engineers",
|
||||||
|
"Met Police",
|
||||||
|
"City of London Police",
|
||||||
|
"British Transport Police",
|
||||||
|
"Police Scotland",
|
||||||
|
"Police Service of Northern Ireland",
|
||||||
|
"PSNI",
|
||||||
|
"London Fire Brigade",
|
||||||
|
"London Ambulance Service",
|
||||||
|
"NHS",
|
||||||
|
"National Health Service",
|
||||||
|
|
||||||
|
// Government Departments
|
||||||
|
"HM Treasury",
|
||||||
|
"Home Office",
|
||||||
|
"Foreign Office",
|
||||||
|
"Ministry of Defence",
|
||||||
|
"MOD",
|
||||||
|
"Department of Health",
|
||||||
|
"Department for Education",
|
||||||
|
"DfE",
|
||||||
|
"Department for Work and Pensions",
|
||||||
|
"DWP",
|
||||||
|
"HMRC",
|
||||||
|
"HM Revenue and Customs",
|
||||||
|
"Cabinet Office",
|
||||||
|
"DVLA",
|
||||||
|
"DVSA",
|
||||||
|
"Environment Agency",
|
||||||
|
"Highways Agency",
|
||||||
|
"Highways England",
|
||||||
|
"National Highways",
|
||||||
|
|
||||||
|
// Armed Forces
|
||||||
|
"British Army",
|
||||||
|
"Royal Navy",
|
||||||
|
"Royal Air Force",
|
||||||
|
"RAF",
|
||||||
|
"Royal Marines",
|
||||||
|
|
||||||
|
// Local Government
|
||||||
|
"London Borough",
|
||||||
|
"County Council",
|
||||||
|
"City Council",
|
||||||
|
"District Council",
|
||||||
|
"Metropolitan Borough",
|
||||||
|
"Borough Council",
|
||||||
|
"Town Council",
|
||||||
|
"Parish Council",
|
||||||
|
"Greater London Council",
|
||||||
|
"GLC",
|
||||||
|
|
||||||
|
// Education
|
||||||
|
"University of",
|
||||||
|
"College of",
|
||||||
|
"School of",
|
||||||
|
|
||||||
|
// Other Public Bodies
|
||||||
|
"BBC",
|
||||||
|
"British Broadcasting Corporation",
|
||||||
|
"Channel 4",
|
||||||
|
"Bank of England",
|
||||||
|
"Royal Mail",
|
||||||
|
"Post Office",
|
||||||
|
"Transport for London",
|
||||||
|
"TfL",
|
||||||
|
"Network Rail",
|
||||||
|
"Ordnance Survey",
|
||||||
|
"Land Registry",
|
||||||
|
"Companies House",
|
||||||
|
"National Archives",
|
||||||
|
"British Library",
|
||||||
|
"British Museum",
|
||||||
|
"National Gallery",
|
||||||
|
"Tate",
|
||||||
|
"Natural History Museum",
|
||||||
|
"Science Museum",
|
||||||
|
"V&A",
|
||||||
|
"Victoria and Albert Museum",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Patterns that indicate an internal division or department of a larger company.
|
||||||
|
/// These are legitimate employer references but won't be separately registered.
|
||||||
|
/// </summary>
|
||||||
|
public static readonly Dictionary<string, string> DivisionPatterns = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Airlines
|
||||||
|
["British Airways Technical Support"] = "British Airways",
|
||||||
|
["BA Technical Support"] = "British Airways",
|
||||||
|
["BA Engineering"] = "British Airways",
|
||||||
|
["British Airways Engineering"] = "British Airways",
|
||||||
|
["FBA - British Airways"] = "British Airways",
|
||||||
|
|
||||||
|
// Major employers with divisions
|
||||||
|
["BBC News"] = "BBC",
|
||||||
|
["BBC World Service"] = "BBC",
|
||||||
|
["BBC Studios"] = "BBC",
|
||||||
|
["ITV News"] = "ITV plc",
|
||||||
|
["Sky News"] = "Sky UK",
|
||||||
|
["BT Openreach"] = "BT Group",
|
||||||
|
["Openreach"] = "BT Group",
|
||||||
|
["BT Research"] = "BT Group",
|
||||||
|
["Shell Research"] = "Shell",
|
||||||
|
["BP Research"] = "BP",
|
||||||
|
["Rolls-Royce Aerospace"] = "Rolls-Royce",
|
||||||
|
["Rolls-Royce Marine"] = "Rolls-Royce",
|
||||||
|
["BAE Systems Naval Ships"] = "BAE Systems",
|
||||||
|
["BAE Systems Submarines"] = "BAE Systems",
|
||||||
|
|
||||||
|
// Banks - divisions
|
||||||
|
["Barclays Investment Bank"] = "Barclays",
|
||||||
|
["Barclays Capital"] = "Barclays",
|
||||||
|
["HSBC Investment Bank"] = "HSBC",
|
||||||
|
["Lloyds Commercial Banking"] = "Lloyds Banking Group",
|
||||||
|
["NatWest Markets"] = "NatWest Group",
|
||||||
|
["RBS Markets"] = "NatWest Group",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Check if an employer name is a known historical company.
|
||||||
|
/// </summary>
|
||||||
|
public static bool IsHistoricalEmployer(string employerName)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(employerName))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return HistoricalCompanies.ContainsKey(employerName.Trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Get information about a historical employer.
|
||||||
|
/// </summary>
|
||||||
|
public static HistoricalEmployerInfo? GetHistoricalEmployerInfo(string employerName)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(employerName))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return HistoricalCompanies.GetValueOrDefault(employerName.Trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Check if an employer is a public sector organisation.
|
||||||
|
/// </summary>
|
||||||
|
public static bool IsPublicSectorEmployer(string employerName)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(employerName))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
var name = employerName.Trim();
|
||||||
|
|
||||||
|
// Direct match
|
||||||
|
if (PublicSectorEmployers.Contains(name))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Partial match for patterns like "London Borough of X"
|
||||||
|
foreach (var pattern in PublicSectorEmployers)
|
||||||
|
{
|
||||||
|
if (name.Contains(pattern, StringComparison.OrdinalIgnoreCase))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Check if an employer is a charity or non-profit organisation.
|
||||||
|
/// </summary>
|
||||||
|
public static bool IsCharityEmployer(string employerName)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(employerName))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
var name = employerName.Trim();
|
||||||
|
|
||||||
|
// Direct match
|
||||||
|
if (CharityEmployers.Contains(name))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// Partial match
|
||||||
|
foreach (var pattern in CharityEmployers)
|
||||||
|
{
|
||||||
|
if (name.Contains(pattern, StringComparison.OrdinalIgnoreCase))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Check if an employer name is an internal division and get the parent company.
|
||||||
|
/// </summary>
|
||||||
|
public static string? GetParentCompanyForDivision(string employerName)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(employerName))
|
||||||
|
return null;
|
||||||
|
|
||||||
|
var name = employerName.Trim();
|
||||||
|
|
||||||
|
// Direct match
|
||||||
|
if (DivisionPatterns.TryGetValue(name, out var parent))
|
||||||
|
return parent;
|
||||||
|
|
||||||
|
// Partial match
|
||||||
|
foreach (var (pattern, parentCompany) in DivisionPatterns)
|
||||||
|
{
|
||||||
|
if (name.Contains(pattern, StringComparison.OrdinalIgnoreCase))
|
||||||
|
return parentCompany;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Information about a historical employer.
|
||||||
|
/// </summary>
|
||||||
|
public sealed record HistoricalEmployerInfo(
|
||||||
|
string SuccessorName,
|
||||||
|
string Notes,
|
||||||
|
string? CompanyNumber
|
||||||
|
);
|
||||||
@@ -122,6 +122,28 @@ public static class UKInstitutions
|
|||||||
"Wrexham University",
|
"Wrexham University",
|
||||||
"York St John University",
|
"York St John University",
|
||||||
|
|
||||||
|
// Post-1992 Universities (former polytechnics)
|
||||||
|
"Leeds Beckett University",
|
||||||
|
"Birmingham City University",
|
||||||
|
"University of Bedfordshire",
|
||||||
|
"Anglia Ruskin University",
|
||||||
|
"University of Central Lancashire",
|
||||||
|
"University of West London",
|
||||||
|
"University of Northampton",
|
||||||
|
"University of Chichester",
|
||||||
|
"Plymouth Marjon University",
|
||||||
|
"Bath Spa University",
|
||||||
|
"Solent University",
|
||||||
|
"University of Bolton",
|
||||||
|
"University of Cumbria",
|
||||||
|
"University of Chester",
|
||||||
|
"University of Gloucestershire",
|
||||||
|
"University of Suffolk",
|
||||||
|
"Newman University",
|
||||||
|
"Bishop Grosseteste University",
|
||||||
|
"Harper Adams University",
|
||||||
|
"Royal Agricultural University",
|
||||||
|
|
||||||
// Scottish Universities
|
// Scottish Universities
|
||||||
"University of Aberdeen",
|
"University of Aberdeen",
|
||||||
"Abertay University",
|
"Abertay University",
|
||||||
@@ -134,6 +156,8 @@ public static class UKInstitutions
|
|||||||
"Bangor University",
|
"Bangor University",
|
||||||
"University of South Wales",
|
"University of South Wales",
|
||||||
"Wrexham Glyndwr University",
|
"Wrexham Glyndwr University",
|
||||||
|
"Wrexham University",
|
||||||
|
"Cardiff Metropolitan University",
|
||||||
|
|
||||||
// Northern Ireland
|
// Northern Ireland
|
||||||
"Ulster University",
|
"Ulster University",
|
||||||
@@ -304,6 +328,112 @@ public static class UKInstitutions
|
|||||||
["South Bank University"] = "London South Bank University",
|
["South Bank University"] = "London South Bank University",
|
||||||
["LSBU"] = "London South Bank University",
|
["LSBU"] = "London South Bank University",
|
||||||
|
|
||||||
|
// Historical polytechnic names (became universities in 1992)
|
||||||
|
// These are legitimate institutions that existed under different names
|
||||||
|
["South Bank Polytechnic"] = "London South Bank University",
|
||||||
|
["Polytechnic of the South Bank"] = "London South Bank University",
|
||||||
|
["Thames Polytechnic"] = "University of Greenwich",
|
||||||
|
["Woolwich Polytechnic"] = "University of Greenwich",
|
||||||
|
["Polytechnic of Central London"] = "University of Westminster",
|
||||||
|
["PCL"] = "University of Westminster",
|
||||||
|
["Polytechnic of North London"] = "London Metropolitan University",
|
||||||
|
["City of London Polytechnic"] = "London Metropolitan University",
|
||||||
|
["London Guildhall University"] = "London Metropolitan University",
|
||||||
|
["University of North London"] = "London Metropolitan University",
|
||||||
|
["Polytechnic of East London"] = "University of East London",
|
||||||
|
["North East London Polytechnic"] = "University of East London",
|
||||||
|
["Middlesex Polytechnic"] = "Middlesex University",
|
||||||
|
["Hatfield Polytechnic"] = "University of Hertfordshire",
|
||||||
|
["Sheffield Polytechnic"] = "Sheffield Hallam University",
|
||||||
|
["Sheffield City Polytechnic"] = "Sheffield Hallam University",
|
||||||
|
["Manchester Polytechnic"] = "Manchester Metropolitan University",
|
||||||
|
["Leeds Polytechnic"] = "Leeds Beckett University",
|
||||||
|
["Leeds Metropolitan University"] = "Leeds Beckett University",
|
||||||
|
["Leicester Polytechnic"] = "De Montfort University",
|
||||||
|
["Coventry Polytechnic"] = "Coventry University",
|
||||||
|
["Lanchester Polytechnic"] = "Coventry University",
|
||||||
|
["Brighton Polytechnic"] = "University of Brighton",
|
||||||
|
["Portsmouth Polytechnic"] = "University of Portsmouth",
|
||||||
|
["Plymouth Polytechnic"] = "University of Plymouth",
|
||||||
|
["Polytechnic South West"] = "University of Plymouth",
|
||||||
|
["Oxford Polytechnic"] = "Oxford Brookes University",
|
||||||
|
["Newcastle Polytechnic"] = "Northumbria University",
|
||||||
|
["Newcastle upon Tyne Polytechnic"] = "Northumbria University",
|
||||||
|
["Sunderland Polytechnic"] = "University of Sunderland",
|
||||||
|
["Teesside Polytechnic"] = "Teesside University",
|
||||||
|
["Huddersfield Polytechnic"] = "University of Huddersfield",
|
||||||
|
["Wolverhampton Polytechnic"] = "University of Wolverhampton",
|
||||||
|
["Liverpool Polytechnic"] = "Liverpool John Moores University",
|
||||||
|
["Bristol Polytechnic"] = "University of the West of England",
|
||||||
|
["Kingston Polytechnic"] = "Kingston University",
|
||||||
|
["Nottingham Polytechnic"] = "Nottingham Trent University",
|
||||||
|
["Trent Polytechnic"] = "Nottingham Trent University",
|
||||||
|
["Birmingham Polytechnic"] = "Birmingham City University",
|
||||||
|
["City of Birmingham Polytechnic"] = "Birmingham City University",
|
||||||
|
["University of Central England"] = "Birmingham City University",
|
||||||
|
["UCE Birmingham"] = "Birmingham City University",
|
||||||
|
["Staffordshire Polytechnic"] = "Staffordshire University",
|
||||||
|
["North Staffordshire Polytechnic"] = "Staffordshire University",
|
||||||
|
["Luton College of Higher Education"] = "University of Bedfordshire",
|
||||||
|
["University of Luton"] = "University of Bedfordshire",
|
||||||
|
["Anglia Polytechnic"] = "Anglia Ruskin University",
|
||||||
|
["Anglia Polytechnic University"] = "Anglia Ruskin University",
|
||||||
|
["APU"] = "Anglia Ruskin University",
|
||||||
|
["Cambridgeshire College of Arts and Technology"] = "Anglia Ruskin University",
|
||||||
|
["CCAT"] = "Anglia Ruskin University",
|
||||||
|
["Bournemouth Polytechnic"] = "Bournemouth University",
|
||||||
|
["Dorset Institute of Higher Education"] = "Bournemouth University",
|
||||||
|
["Derby College of Higher Education"] = "University of Derby",
|
||||||
|
["Derbyshire College of Higher Education"] = "University of Derby",
|
||||||
|
["Humberside Polytechnic"] = "University of Lincoln",
|
||||||
|
["Humberside College of Higher Education"] = "University of Lincoln",
|
||||||
|
["University of Humberside"] = "University of Lincoln",
|
||||||
|
["University of Lincolnshire and Humberside"] = "University of Lincoln",
|
||||||
|
["Central Lancashire Polytechnic"] = "University of Central Lancashire",
|
||||||
|
["Preston Polytechnic"] = "University of Central Lancashire",
|
||||||
|
["Lancashire Polytechnic"] = "University of Central Lancashire",
|
||||||
|
["Glamorgan Polytechnic"] = "University of South Wales",
|
||||||
|
["Polytechnic of Wales"] = "University of South Wales",
|
||||||
|
["University of Glamorgan"] = "University of South Wales",
|
||||||
|
["Robert Gordon Institute of Technology"] = "Robert Gordon University",
|
||||||
|
["RGIT"] = "Robert Gordon University",
|
||||||
|
["Napier Polytechnic"] = "Edinburgh Napier University",
|
||||||
|
["Napier College"] = "Edinburgh Napier University",
|
||||||
|
["Glasgow Polytechnic"] = "Glasgow Caledonian University",
|
||||||
|
["Queen's College Glasgow"] = "Glasgow Caledonian University",
|
||||||
|
["Dundee Institute of Technology"] = "Abertay University",
|
||||||
|
["Dundee College of Technology"] = "Abertay University",
|
||||||
|
|
||||||
|
// Other historical name changes
|
||||||
|
["Roehampton Institute"] = "Roehampton University",
|
||||||
|
["University of Surrey Roehampton"] = "Roehampton University",
|
||||||
|
["Thames Valley University"] = "University of West London",
|
||||||
|
["Polytechnic of West London"] = "University of West London",
|
||||||
|
["Ealing College of Higher Education"] = "University of West London",
|
||||||
|
["London College of Music and Media"] = "University of West London",
|
||||||
|
["University College Northampton"] = "University of Northampton",
|
||||||
|
["Nene College"] = "University of Northampton",
|
||||||
|
["University College Worcester"] = "University of Worcester",
|
||||||
|
["Worcester College of Higher Education"] = "University of Worcester",
|
||||||
|
["University College Chichester"] = "University of Chichester",
|
||||||
|
["Chichester Institute of Higher Education"] = "University of Chichester",
|
||||||
|
["College of St Mark and St John"] = "Plymouth Marjon University",
|
||||||
|
["Marjon"] = "Plymouth Marjon University",
|
||||||
|
["University of St Mark and St John"] = "Plymouth Marjon University",
|
||||||
|
["University College Falmouth"] = "Falmouth University",
|
||||||
|
["Falmouth College of Arts"] = "Falmouth University",
|
||||||
|
["Bath College of Higher Education"] = "Bath Spa University",
|
||||||
|
["Bath Spa University College"] = "Bath Spa University",
|
||||||
|
["Liverpool Institute of Higher Education"] = "Liverpool Hope University",
|
||||||
|
["Liverpool Hope University College"] = "Liverpool Hope University",
|
||||||
|
["University of Wales, Newport"] = "University of South Wales",
|
||||||
|
["University of Wales Institute, Cardiff"] = "Cardiff Metropolitan University",
|
||||||
|
["UWIC"] = "Cardiff Metropolitan University",
|
||||||
|
["North East Wales Institute"] = "Wrexham University",
|
||||||
|
["NEWI"] = "Wrexham University",
|
||||||
|
["Glyndwr University"] = "Wrexham University",
|
||||||
|
["Wrexham Glyndwr University"] = "Wrexham University",
|
||||||
|
|
||||||
// Other common variations
|
// Other common variations
|
||||||
["Open University"] = "The Open University",
|
["Open University"] = "The Open University",
|
||||||
["OU"] = "The Open University",
|
["OU"] = "The Open University",
|
||||||
|
|||||||
@@ -8,8 +8,22 @@ public interface ICompanyNameMatcherService
|
|||||||
/// Uses AI to semantically compare a company name from a CV against Companies House candidates.
|
/// Uses AI to semantically compare a company name from a CV against Companies House candidates.
|
||||||
/// Returns the best match with confidence score and reasoning.
|
/// Returns the best match with confidence score and reasoning.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
/// <param name="cvCompanyName">The company name as written on the CV</param>
|
||||||
|
/// <param name="candidates">List of potential matches from Companies House</param>
|
||||||
|
/// <param name="industryHint">Optional industry context for well-known brands (e.g., "pharmacy/healthcare retail")</param>
|
||||||
|
/// <param name="cancellationToken">Cancellation token</param>
|
||||||
Task<SemanticMatchResult?> FindBestMatchAsync(
|
Task<SemanticMatchResult?> FindBestMatchAsync(
|
||||||
string cvCompanyName,
|
string cvCompanyName,
|
||||||
List<CompanyCandidate> candidates,
|
List<CompanyCandidate> candidates,
|
||||||
|
string? industryHint = null,
|
||||||
|
CancellationToken cancellationToken = default);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Uses AI to detect if a company name contains multiple companies and extract them.
|
||||||
|
/// Returns null or single-item list if it's a single company (e.g., "Ernst & Young").
|
||||||
|
/// Returns multiple items if compound (e.g., "ASDA/WALMART" -> ["ASDA", "WALMART"]).
|
||||||
|
/// </summary>
|
||||||
|
Task<List<string>?> ExtractCompanyNamesAsync(
|
||||||
|
string companyName,
|
||||||
CancellationToken cancellationToken = default);
|
CancellationToken cancellationToken = default);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -92,6 +92,19 @@ public sealed class ProcessCVCheckJob
|
|||||||
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
|
"Parsed CV for check {CheckId}: {EmploymentCount} employment entries",
|
||||||
cvCheckId, cvData.Employment.Count);
|
cvCheckId, cvData.Employment.Count);
|
||||||
|
|
||||||
|
// Validate that the CV contains meaningful data
|
||||||
|
// A CV with no name, no employment AND no education is likely a parsing failure
|
||||||
|
if (cvData.Employment.Count == 0 && cvData.Education.Count == 0 &&
|
||||||
|
(string.IsNullOrWhiteSpace(cvData.FullName) || cvData.FullName == "Unknown"))
|
||||||
|
{
|
||||||
|
_logger.LogWarning(
|
||||||
|
"CV check {CheckId} parsed with no extractable data - possible scanned/image PDF or parsing failure",
|
||||||
|
cvCheckId);
|
||||||
|
throw new InvalidOperationException(
|
||||||
|
"Could not extract any employment or education data from this CV. " +
|
||||||
|
"The file may be a scanned image, password-protected, or in an unsupported format.");
|
||||||
|
}
|
||||||
|
|
||||||
// Step 4: Save extracted data
|
// Step 4: Save extracted data
|
||||||
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
|
cvCheck.ExtractedDataJson = JsonSerializer.Serialize(cvData, JsonDefaults.CamelCaseIndented);
|
||||||
cvCheck.ProcessingStage = "Verifying Employment";
|
cvCheck.ProcessingStage = "Verifying Employment";
|
||||||
@@ -279,6 +292,8 @@ public sealed class ProcessCVCheckJob
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
cvCheck.Status = CheckStatus.Failed;
|
cvCheck.Status = CheckStatus.Failed;
|
||||||
|
// Store a user-friendly error message
|
||||||
|
cvCheck.ProcessingStage = GetUserFriendlyErrorMessage(ex);
|
||||||
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
|
// Use CancellationToken.None to ensure failure status is saved even if original token is cancelled
|
||||||
await _dbContext.SaveChangesAsync(CancellationToken.None);
|
await _dbContext.SaveChangesAsync(CancellationToken.None);
|
||||||
}
|
}
|
||||||
@@ -1411,4 +1426,39 @@ public sealed class ProcessCVCheckJob
|
|||||||
obj.FlagType?.ToUpperInvariant() ?? "");
|
obj.FlagType?.ToUpperInvariant() ?? "");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Returns a user-friendly error message based on the exception type.
|
||||||
|
/// </summary>
|
||||||
|
private static string GetUserFriendlyErrorMessage(Exception ex)
|
||||||
|
{
|
||||||
|
// Check for specific error patterns
|
||||||
|
var message = ex.Message;
|
||||||
|
|
||||||
|
if (message.Contains("no extractable data", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
message.Contains("Could not extract any employment", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "No useful data could be extracted from this CV. The file may be a scanned image or in an unsupported format.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.Contains("API usage limits", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
message.Contains("rate limit", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "Service temporarily unavailable. Please try again in a few minutes.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.Contains("Could not extract text", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "Could not read the CV file. Please ensure it's a valid PDF or DOCX document.";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message.Contains("password", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
message.Contains("encrypted", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return "This CV appears to be password-protected. Please upload an unprotected version.";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default message
|
||||||
|
return "An error occurred while processing your CV. Please try uploading again.";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,28 +33,53 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
Compare the company name from a CV against official Companies House records.
|
Compare the company name from a CV against official Companies House records.
|
||||||
|
|
||||||
CV Company Name: "{CV_COMPANY}"
|
CV Company Name: "{CV_COMPANY}"
|
||||||
|
{INDUSTRY_CONTEXT}
|
||||||
Companies House Candidates:
|
Companies House Candidates:
|
||||||
{CANDIDATES}
|
{CANDIDATES}
|
||||||
|
|
||||||
Determine which candidate (if any) is the SAME company as the CV entry.
|
Determine which candidate (if any) is the SAME company as the CV entry.
|
||||||
|
|
||||||
Rules:
|
Matching Guidelines:
|
||||||
1. A match requires the companies to be the SAME organisation, not just similar names
|
1. MATCH if the CV name is the same organisation as a candidate (even if registered name differs):
|
||||||
2. "Families First CiC" is NOT the same as "FAMILIES AGAINST CONFORMITY LTD" - different words = different companies
|
- "Boots" → "BOOTS UK LIMITED" ✓ (trading name = registered company)
|
||||||
3. Trading names should match their registered entity (e.g., "Tesco" matches "TESCO PLC")
|
- "Boots" → "THE BOOTS COMPANY PLC" ✓ (trading name = parent company)
|
||||||
4. Subsidiaries can match if clearly the same organisation (e.g., "ASDA" could match "ASDA STORES LIMITED")
|
- "Tesco" → "TESCO PLC" ✓ (trading name = registered name)
|
||||||
5. Acronyms in parentheses are abbreviations of the full name (e.g., "North Halifax Partnership (NHP)" = "NORTH HALIFAX PARTNERSHIP")
|
- "ASDA" → "ASDA STORES LIMITED" ✓ (brand = operating company)
|
||||||
6. CiC/CIC = Community Interest Company, LLP = Limited Liability Partnership - these are legal suffixes
|
- "Legal & General" → "LEGAL & GENERAL GROUP PLC" ✓ (brand = holding company)
|
||||||
7. If the CV name contains all the key words of a candidate (ignoring Ltd/Limited/CIC/etc.), it's likely a match
|
- "Checkout.com" → "CHECKOUT.COM PAYMENTS LIMITED" ✓ (exact match)
|
||||||
8. If NO candidate is clearly the same company, return "NONE" as the best match
|
- "EY UK" → "ERNST & YOUNG LLP" ✓ (trading name = partnership)
|
||||||
|
- "Royal Bank of Scotland" → "THE ROYAL BANK OF SCOTLAND PUBLIC LIMITED COMPANY" ✓
|
||||||
|
|
||||||
|
2. DO NOT MATCH if the candidate adds significant DIFFERENT words that indicate a different business:
|
||||||
|
- "Boots" ≠ "BOOTS AND BEARDS" ✗ (pharmacy chain is NOT a barber/grooming business)
|
||||||
|
- "Legal & General" ≠ "LEGAL LIMITED" ✗ (major insurer is NOT a generic "legal" company)
|
||||||
|
- "Checkout.com" ≠ "XN CHECKOUT LIMITED" ✗ (fintech is NOT an unrelated checkout company)
|
||||||
|
- "EY UK" ≠ "EY UK GDPR REPRESENTATIVE LIMITED" ✗ (main employer, not a subsidiary)
|
||||||
|
|
||||||
|
3. KEY DISTINCTION - Geographic/legal suffixes are OK, but new business words are NOT:
|
||||||
|
- "Boots" → "BOOTS UK LIMITED" ✓ (UK is just geographic qualifier)
|
||||||
|
- "Boots" → "BOOTS AND BEARDS" ✗ (BEARDS indicates different business)
|
||||||
|
- "Meridian Holdings" → "MERIDIAN (THE ORIGINAL) LIMITED" ✗ ("THE ORIGINAL" suggests different business)
|
||||||
|
- "Paramount Consulting UK" → "PARAMOUNT LIMITED" ✗ (missing "Consulting" - different type)
|
||||||
|
- "Apex Technology Partners" → "APEX LIMITED" ✗ (missing "Technology Partners")
|
||||||
|
|
||||||
|
4. Legal suffixes (Ltd, Limited, PLC, LLP, CiC) should be ignored when comparing names
|
||||||
|
|
||||||
|
5. Adding "THE", "GROUP", "UK", or "HOLDINGS" to a name doesn't make it a different company
|
||||||
|
|
||||||
|
6. When the CV mentions a well-known brand, prefer the main operating/holding company over obscure matches
|
||||||
|
|
||||||
|
7. If INDUSTRY CONTEXT is provided, use it to reject candidates clearly in different industries
|
||||||
|
|
||||||
|
CRITICAL: Return the COMPLETE company number exactly as shown (e.g., "SC083026", "02366995").
|
||||||
|
Do NOT truncate or abbreviate the company number.
|
||||||
|
|
||||||
Respond with this exact JSON structure:
|
Respond with this exact JSON structure:
|
||||||
{
|
{
|
||||||
"bestMatchCompanyNumber": "string (company number of best match, or 'NONE' if no valid match)",
|
"bestMatchCompanyNumber": "COMPLETE company number from the list above, or 'NONE' if no valid match",
|
||||||
"confidenceScore": number (0-100, where 100 = certain match, 0 = no match),
|
"confidenceScore": number (0-100, where 100 = certain match, 0 = no match),
|
||||||
"matchType": "string (Exact, TradingName, Subsidiary, Parent, NoMatch)",
|
"matchType": "Exact|TradingName|Subsidiary|Parent|NoMatch",
|
||||||
"reasoning": "string (brief explanation of why this is or isn't a match)"
|
"reasoning": "brief explanation"
|
||||||
}
|
}
|
||||||
""";
|
""";
|
||||||
|
|
||||||
@@ -69,6 +94,7 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
public async Task<SemanticMatchResult?> FindBestMatchAsync(
|
public async Task<SemanticMatchResult?> FindBestMatchAsync(
|
||||||
string cvCompanyName,
|
string cvCompanyName,
|
||||||
List<CompanyCandidate> candidates,
|
List<CompanyCandidate> candidates,
|
||||||
|
string? industryHint = null,
|
||||||
CancellationToken cancellationToken = default)
|
CancellationToken cancellationToken = default)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(cvCompanyName) || candidates.Count == 0)
|
if (string.IsNullOrWhiteSpace(cvCompanyName) || candidates.Count == 0)
|
||||||
@@ -76,16 +102,23 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
_logger.LogDebug("Using AI to match '{CVCompany}' against {Count} candidates",
|
_logger.LogDebug("Using AI to match '{CVCompany}' against {Count} candidates (industry: {Industry})",
|
||||||
cvCompanyName, candidates.Count);
|
cvCompanyName, candidates.Count, industryHint ?? "unknown");
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
// Format candidates with company number prominently displayed to prevent truncation
|
||||||
var candidatesText = string.Join("\n", candidates.Select((c, i) =>
|
var candidatesText = string.Join("\n", candidates.Select((c, i) =>
|
||||||
$"{i + 1}. {c.CompanyName} (Number: {c.CompanyNumber}, Status: {c.CompanyStatus ?? "Unknown"})"));
|
$"[{c.CompanyNumber}] {c.CompanyName} (Status: {c.CompanyStatus ?? "Unknown"})"));
|
||||||
|
|
||||||
|
// Add industry context if available
|
||||||
|
var industryContext = string.IsNullOrEmpty(industryHint)
|
||||||
|
? ""
|
||||||
|
: $"Industry Context: This is a well-known brand in {industryHint}. Reject candidates clearly in different industries.\n";
|
||||||
|
|
||||||
var prompt = MatchingPrompt
|
var prompt = MatchingPrompt
|
||||||
.Replace("{CV_COMPANY}", cvCompanyName)
|
.Replace("{CV_COMPANY}", cvCompanyName)
|
||||||
|
.Replace("{INDUSTRY_CONTEXT}", industryContext)
|
||||||
.Replace("{CANDIDATES}", candidatesText);
|
.Replace("{CANDIDATES}", candidatesText);
|
||||||
|
|
||||||
var messages = new List<Message>
|
var messages = new List<Message>
|
||||||
@@ -95,8 +128,8 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
|
|
||||||
var parameters = new MessageParameters
|
var parameters = new MessageParameters
|
||||||
{
|
{
|
||||||
Model = "claude-sonnet-4-20250514",
|
Model = "claude-3-5-haiku-20241022",
|
||||||
MaxTokens = 1024,
|
MaxTokens = 512,
|
||||||
Messages = messages,
|
Messages = messages,
|
||||||
System = [new SystemMessage(SystemPrompt)]
|
System = [new SystemMessage(SystemPrompt)]
|
||||||
};
|
};
|
||||||
@@ -127,7 +160,8 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
aiResponse.BestMatchCompanyNumber, aiResponse.ConfidenceScore, aiResponse.Reasoning);
|
aiResponse.BestMatchCompanyNumber, aiResponse.ConfidenceScore, aiResponse.Reasoning);
|
||||||
|
|
||||||
// Find the matched candidate
|
// Find the matched candidate
|
||||||
if (aiResponse.BestMatchCompanyNumber == "NONE" || aiResponse.ConfidenceScore < 50)
|
// Lower threshold to 30 - we have fuzzy validation as backup
|
||||||
|
if (aiResponse.BestMatchCompanyNumber == "NONE" || aiResponse.ConfidenceScore < 30)
|
||||||
{
|
{
|
||||||
return new SemanticMatchResult
|
return new SemanticMatchResult
|
||||||
{
|
{
|
||||||
@@ -142,10 +176,40 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
var matchedCandidate = candidates.FirstOrDefault(c =>
|
var matchedCandidate = candidates.FirstOrDefault(c =>
|
||||||
c.CompanyNumber.Equals(aiResponse.BestMatchCompanyNumber, StringComparison.OrdinalIgnoreCase));
|
c.CompanyNumber.Equals(aiResponse.BestMatchCompanyNumber, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
// If exact match not found, try to find a candidate that starts with the returned number
|
||||||
|
// This handles cases where AI truncates "09052626" to "09" or similar
|
||||||
|
if (matchedCandidate is null && !string.IsNullOrWhiteSpace(aiResponse.BestMatchCompanyNumber)
|
||||||
|
&& aiResponse.BestMatchCompanyNumber != "NONE")
|
||||||
|
{
|
||||||
|
var partialMatch = candidates.FirstOrDefault(c =>
|
||||||
|
c.CompanyNumber.StartsWith(aiResponse.BestMatchCompanyNumber, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
if (partialMatch is not null)
|
||||||
|
{
|
||||||
|
_logger.LogDebug("AI returned partial company number '{Partial}', matched to full number '{Full}'",
|
||||||
|
aiResponse.BestMatchCompanyNumber, partialMatch.CompanyNumber);
|
||||||
|
matchedCandidate = partialMatch;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Try reverse - maybe AI returned a longer string that contains the actual number
|
||||||
|
var reverseMatch = candidates.FirstOrDefault(c =>
|
||||||
|
aiResponse.BestMatchCompanyNumber.Contains(c.CompanyNumber, StringComparison.OrdinalIgnoreCase));
|
||||||
|
|
||||||
|
if (reverseMatch is not null)
|
||||||
|
{
|
||||||
|
_logger.LogDebug("AI returned string containing company number '{Number}'",
|
||||||
|
reverseMatch.CompanyNumber);
|
||||||
|
matchedCandidate = reverseMatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (matchedCandidate is null)
|
if (matchedCandidate is null)
|
||||||
{
|
{
|
||||||
_logger.LogWarning("AI returned company number {Number} not in candidates list",
|
_logger.LogWarning("AI returned company number '{Number}' not in candidates list. Candidates: {Candidates}",
|
||||||
aiResponse.BestMatchCompanyNumber);
|
aiResponse.BestMatchCompanyNumber,
|
||||||
|
string.Join(", ", candidates.Select(c => c.CompanyNumber)));
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -164,4 +228,360 @@ public sealed class AICompanyNameMatcherService : ICompanyNameMatcherService
|
|||||||
return null; // Fall back to fuzzy matching
|
return null; // Fall back to fuzzy matching
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Well-known company names that contain "&" or "and" but are SINGLE companies.
|
||||||
|
/// These should NOT be split into multiple parts.
|
||||||
|
/// </summary>
|
||||||
|
private static readonly HashSet<string> KnownSingleCompanyNames = new(StringComparer.OrdinalIgnoreCase)
|
||||||
|
{
|
||||||
|
// Big 4 / Professional Services
|
||||||
|
"Ernst & Young", "Ernst and Young", "EY",
|
||||||
|
"Deloitte and Touche", "Deloitte & Touche",
|
||||||
|
"PricewaterhouseCoopers", "Price Waterhouse",
|
||||||
|
"KPMG",
|
||||||
|
"Accenture",
|
||||||
|
|
||||||
|
// Retail
|
||||||
|
"Marks & Spencer", "Marks and Spencer", "M&S",
|
||||||
|
"Fortnum & Mason", "Fortnum and Mason",
|
||||||
|
"Crabtree & Evelyn",
|
||||||
|
"Holland & Barrett", "Holland and Barrett",
|
||||||
|
"Past Times & Present",
|
||||||
|
"Barnes & Noble",
|
||||||
|
"Abercrombie & Fitch",
|
||||||
|
"Dolce & Gabbana",
|
||||||
|
"Bang & Olufsen",
|
||||||
|
"Crate & Barrel",
|
||||||
|
"Bed Bath & Beyond",
|
||||||
|
"Bath & Body Works",
|
||||||
|
|
||||||
|
// Consumer Goods
|
||||||
|
"Procter & Gamble", "Procter and Gamble", "P&G",
|
||||||
|
"Johnson & Johnson", "Johnson and Johnson", "J&J",
|
||||||
|
"Reckitt & Colman", "Reckitt and Colman",
|
||||||
|
"Colgate-Palmolive",
|
||||||
|
"Unilever",
|
||||||
|
"Henkel",
|
||||||
|
|
||||||
|
// Food & Beverage
|
||||||
|
"Prêt A Manger", "Pret A Manger",
|
||||||
|
"Fortnum and Mason",
|
||||||
|
"Lyle & Scott",
|
||||||
|
"Ben & Jerry's", "Ben and Jerry's",
|
||||||
|
"Baskin & Robbins",
|
||||||
|
"Haribo",
|
||||||
|
|
||||||
|
// Finance & Insurance
|
||||||
|
"Standard & Poor's", "Standard and Poor's", "S&P",
|
||||||
|
"Moody's",
|
||||||
|
"Fitch Ratings",
|
||||||
|
"Lloyd's of London",
|
||||||
|
"Coutts & Co", "Coutts and Co",
|
||||||
|
"Brown Shipley & Co",
|
||||||
|
"Schroders",
|
||||||
|
|
||||||
|
// Law Firms (common patterns)
|
||||||
|
"Allen & Overy", "Allen and Overy",
|
||||||
|
"Clifford Chance",
|
||||||
|
"Freshfields Bruckhaus Deringer",
|
||||||
|
"Linklaters",
|
||||||
|
"Slaughter and May", "Slaughter & May",
|
||||||
|
"Herbert Smith Freehills",
|
||||||
|
"Hogan Lovells",
|
||||||
|
"Norton Rose Fulbright",
|
||||||
|
"DLA Piper",
|
||||||
|
"Baker & McKenzie", "Baker McKenzie",
|
||||||
|
"Eversheds Sutherland",
|
||||||
|
"Ashurst",
|
||||||
|
"CMS",
|
||||||
|
"Simmons & Simmons",
|
||||||
|
"Travers Smith",
|
||||||
|
"Macfarlanes",
|
||||||
|
"Addleshaw Goddard",
|
||||||
|
"Pinsent Masons",
|
||||||
|
"Shoosmiths",
|
||||||
|
"Irwin Mitchell",
|
||||||
|
"DAC Beachcroft",
|
||||||
|
"Weightmans",
|
||||||
|
"Browne Jacobson",
|
||||||
|
"Mills & Reeve", "Mills and Reeve",
|
||||||
|
"Taylor Wessing",
|
||||||
|
"Osborne Clarke",
|
||||||
|
"Bird & Bird", "Bird and Bird",
|
||||||
|
"Withers",
|
||||||
|
"Charles Russell Speechlys",
|
||||||
|
"Stephenson Harwood",
|
||||||
|
"Watson Farley & Williams",
|
||||||
|
"Clyde & Co", "Clyde and Co",
|
||||||
|
"Reed Smith",
|
||||||
|
"Kennedys",
|
||||||
|
"Fieldfisher",
|
||||||
|
"RPC",
|
||||||
|
"Womble Bond Dickinson",
|
||||||
|
"Burges Salmon",
|
||||||
|
"Trowers & Hamlins", "Trowers and Hamlins",
|
||||||
|
"Bevan Brittan",
|
||||||
|
"Veale Wasbrough Vizards",
|
||||||
|
|
||||||
|
// Media & Entertainment
|
||||||
|
"Simon & Schuster",
|
||||||
|
"Warner Bros", "Warner Brothers",
|
||||||
|
"William Morris Endeavor",
|
||||||
|
"Creative Artists Agency",
|
||||||
|
|
||||||
|
// Automotive
|
||||||
|
"Rolls-Royce",
|
||||||
|
"Aston Martin",
|
||||||
|
"Jaguar Land Rover",
|
||||||
|
|
||||||
|
// Pharmaceuticals
|
||||||
|
"GlaxoSmithKline", "GSK",
|
||||||
|
"AstraZeneca",
|
||||||
|
"Smith & Nephew",
|
||||||
|
"Roche",
|
||||||
|
|
||||||
|
// Engineering & Construction
|
||||||
|
"Mott MacDonald",
|
||||||
|
"Arup",
|
||||||
|
"Laing O'Rourke",
|
||||||
|
"Kier",
|
||||||
|
"Balfour Beatty",
|
||||||
|
"Taylor Wimpey",
|
||||||
|
"Persimmon",
|
||||||
|
"Bellway",
|
||||||
|
"Berkeley",
|
||||||
|
|
||||||
|
// Technology
|
||||||
|
"Hewlett-Packard", "HP",
|
||||||
|
"Texas Instruments",
|
||||||
|
"AT&T",
|
||||||
|
"T-Mobile",
|
||||||
|
|
||||||
|
// Other
|
||||||
|
"Young & Co", "Young and Co",
|
||||||
|
"Smith & Williamson",
|
||||||
|
"Grant Thornton",
|
||||||
|
"BDO",
|
||||||
|
"RSM",
|
||||||
|
"Mazars",
|
||||||
|
"Moore Kingston Smith",
|
||||||
|
"Crowe",
|
||||||
|
"PKF",
|
||||||
|
"Saffery Champness",
|
||||||
|
"Buzzacott",
|
||||||
|
"HW Fisher",
|
||||||
|
"Haysmacintyre",
|
||||||
|
"Menzies",
|
||||||
|
"MHA",
|
||||||
|
"Azets",
|
||||||
|
"Dains",
|
||||||
|
"Streets",
|
||||||
|
"Armstrong Watson",
|
||||||
|
|
||||||
|
// Common department/division patterns (not to be split)
|
||||||
|
"Sales and Marketing",
|
||||||
|
"Research and Development", "R&D",
|
||||||
|
"Human Resources",
|
||||||
|
"Finance and Operations",
|
||||||
|
"Legal and Compliance",
|
||||||
|
"IT and Digital",
|
||||||
|
"Supply Chain and Logistics",
|
||||||
|
};
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Patterns that indicate a name is likely referring to divisions/departments of ONE company.
|
||||||
|
/// </summary>
|
||||||
|
private static readonly string[] SingleCompanyPatterns =
|
||||||
|
[
|
||||||
|
" stores and ", // "Tesco Stores and Distribution"
|
||||||
|
" retail and ", // "Next Retail and Online"
|
||||||
|
" uk and ", // "BMW UK and Ireland"
|
||||||
|
" europe and ", // "Google Europe and Middle East"
|
||||||
|
" division and ",
|
||||||
|
" department and ",
|
||||||
|
" services and ",
|
||||||
|
" group and ",
|
||||||
|
" plc and ",
|
||||||
|
" ltd and ",
|
||||||
|
" limited and ",
|
||||||
|
];
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Determines if a company name refers to multiple companies and extracts them.
|
||||||
|
/// Uses rule-based detection instead of AI for better performance and cost savings.
|
||||||
|
/// </summary>
|
||||||
|
public Task<List<string>?> ExtractCompanyNamesAsync(
|
||||||
|
string companyName,
|
||||||
|
CancellationToken cancellationToken = default)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(companyName))
|
||||||
|
{
|
||||||
|
return Task.FromResult<List<string>?>(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("Checking if '{CompanyName}' is a compound name (rule-based)", companyName);
|
||||||
|
|
||||||
|
var result = DetectCompoundName(companyName);
|
||||||
|
|
||||||
|
if (result is null)
|
||||||
|
{
|
||||||
|
_logger.LogDebug("'{CompanyName}' is a single company", companyName);
|
||||||
|
return Task.FromResult<List<string>?>(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
_logger.LogDebug("'{CompanyName}' detected as compound, parts: [{Parts}]",
|
||||||
|
companyName, string.Join(", ", result));
|
||||||
|
|
||||||
|
return Task.FromResult<List<string>?>(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Rule-based detection of compound company names.
|
||||||
|
/// Returns null if single company, or list of parts if multiple companies.
|
||||||
|
/// </summary>
|
||||||
|
private List<string>? DetectCompoundName(string name)
|
||||||
|
{
|
||||||
|
var trimmedName = name.Trim();
|
||||||
|
|
||||||
|
// Check 1: Is this a known single company name?
|
||||||
|
if (IsKnownSingleCompany(trimmedName))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 2: Does it match single-company patterns (departments/divisions)?
|
||||||
|
if (MatchesSingleCompanyPattern(trimmedName))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 3: "/" is a strong indicator of multiple companies
|
||||||
|
if (trimmedName.Contains('/'))
|
||||||
|
{
|
||||||
|
var slashParts = trimmedName
|
||||||
|
.Split('/')
|
||||||
|
.Select(p => p.Trim())
|
||||||
|
.Where(p => p.Length >= 2)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (slashParts.Count >= 2)
|
||||||
|
{
|
||||||
|
return slashParts;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 4: " & " or " and " between what look like separate company names
|
||||||
|
// Only split if both parts look like distinct company names
|
||||||
|
var andMatch = System.Text.RegularExpressions.Regex.Match(
|
||||||
|
trimmedName,
|
||||||
|
@"^(.+?)\s+(?:&|and)\s+(.+)$",
|
||||||
|
System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||||
|
|
||||||
|
if (andMatch.Success)
|
||||||
|
{
|
||||||
|
var part1 = andMatch.Groups[1].Value.Trim();
|
||||||
|
var part2 = andMatch.Groups[2].Value.Trim();
|
||||||
|
|
||||||
|
// If the combined name is a known single company, don't split
|
||||||
|
if (IsKnownSingleCompany(trimmedName))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If either part is very short (like initials), probably not a split
|
||||||
|
if (part1.Length < 3 || part2.Length < 3)
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If part2 looks like a department/role descriptor, don't split
|
||||||
|
if (IsDepartmentOrRole(part2))
|
||||||
|
{
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If both parts look like independent company names, this is likely compound
|
||||||
|
if (LooksLikeCompanyName(part1) && LooksLikeCompanyName(part2))
|
||||||
|
{
|
||||||
|
return [part1, part2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default: treat as single company
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool IsKnownSingleCompany(string name)
|
||||||
|
{
|
||||||
|
// Direct match
|
||||||
|
if (KnownSingleCompanyNames.Contains(name))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the name contains any known single company as a substring
|
||||||
|
foreach (var known in KnownSingleCompanyNames)
|
||||||
|
{
|
||||||
|
if (name.Contains(known, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool MatchesSingleCompanyPattern(string name)
|
||||||
|
{
|
||||||
|
var lowerName = name.ToLowerInvariant();
|
||||||
|
return SingleCompanyPatterns.Any(pattern => lowerName.Contains(pattern));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool IsDepartmentOrRole(string text)
|
||||||
|
{
|
||||||
|
var lower = text.ToLowerInvariant();
|
||||||
|
string[] departmentKeywords =
|
||||||
|
[
|
||||||
|
"department", "division", "team", "group", "unit",
|
||||||
|
"services", "solutions", "operations", "logistics",
|
||||||
|
"distribution", "manufacturing", "production",
|
||||||
|
"marketing", "sales", "finance", "accounting",
|
||||||
|
"hr", "human resources", "it", "technology",
|
||||||
|
"research", "development", "r&d", "engineering",
|
||||||
|
"retail", "wholesale", "stores", "online",
|
||||||
|
"consulting", "advisory", "support"
|
||||||
|
];
|
||||||
|
|
||||||
|
return departmentKeywords.Any(kw => lower.Contains(kw));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool LooksLikeCompanyName(string text)
|
||||||
|
{
|
||||||
|
// A company name typically:
|
||||||
|
// - Is at least 2 characters
|
||||||
|
// - Starts with a capital letter (or is all caps)
|
||||||
|
// - May end with Ltd, Limited, PLC, Inc, etc.
|
||||||
|
|
||||||
|
if (text.Length < 2)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it contains company suffixes, definitely a company name
|
||||||
|
string[] companySuffixes = ["ltd", "limited", "plc", "inc", "corp", "llp", "llc", "group", "holdings"];
|
||||||
|
var lower = text.ToLowerInvariant();
|
||||||
|
if (companySuffixes.Any(s => lower.EndsWith(s) || lower.Contains($" {s}")))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If it looks like it could be a company (starts with capital, reasonable length)
|
||||||
|
if (char.IsUpper(text[0]) && text.Length >= 3)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -171,8 +171,8 @@ public sealed class CVParserService : ICVParserService
|
|||||||
|
|
||||||
var parameters = new MessageParameters
|
var parameters = new MessageParameters
|
||||||
{
|
{
|
||||||
Model = "claude-sonnet-4-20250514",
|
Model = "claude-3-5-haiku-20241022",
|
||||||
MaxTokens = 4096,
|
MaxTokens = 2048,
|
||||||
Messages = messages,
|
Messages = messages,
|
||||||
System = [new SystemMessage(SystemPrompt)]
|
System = [new SystemMessage(SystemPrompt)]
|
||||||
};
|
};
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -120,6 +120,15 @@ public sealed class TimelineAnalyserService : ITimelineAnalyserService
|
|||||||
var earlier = sortedEmployment[i];
|
var earlier = sortedEmployment[i];
|
||||||
var later = sortedEmployment[j];
|
var later = sortedEmployment[j];
|
||||||
|
|
||||||
|
// Skip overlaps at the same company (internal promotions/transfers)
|
||||||
|
if (IsSameCompany(earlier.CompanyName, later.CompanyName))
|
||||||
|
{
|
||||||
|
_logger.LogDebug(
|
||||||
|
"Ignoring overlap at same company: {Company1} -> {Company2}",
|
||||||
|
earlier.CompanyName, later.CompanyName);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
var overlap = CalculateOverlap(earlier, later);
|
var overlap = CalculateOverlap(earlier, later);
|
||||||
|
|
||||||
if (overlap is not null && overlap.Value.Months > AllowedOverlapMonths)
|
if (overlap is not null && overlap.Value.Months > AllowedOverlapMonths)
|
||||||
@@ -143,6 +152,59 @@ public sealed class TimelineAnalyserService : ITimelineAnalyserService
|
|||||||
return overlaps;
|
return overlaps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Determines if two company names refer to the same company.
|
||||||
|
/// Handles variations like "BMW" vs "BMW UK" vs "BMW Group".
|
||||||
|
/// </summary>
|
||||||
|
private static bool IsSameCompany(string? company1, string? company2)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(company1) || string.IsNullOrWhiteSpace(company2))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize names for comparison
|
||||||
|
var name1 = NormalizeCompanyName(company1);
|
||||||
|
var name2 = NormalizeCompanyName(company2);
|
||||||
|
|
||||||
|
// Exact match after normalization
|
||||||
|
if (name1.Equals(name2, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if one contains the other (for "BMW" vs "BMW UK" cases)
|
||||||
|
if (name1.Length >= 3 && name2.Length >= 3)
|
||||||
|
{
|
||||||
|
if (name1.StartsWith(name2, StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
name2.StartsWith(name1, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string NormalizeCompanyName(string name)
|
||||||
|
{
|
||||||
|
// Remove common suffixes and normalize
|
||||||
|
var normalized = name.Trim();
|
||||||
|
|
||||||
|
string[] suffixes = ["Ltd", "Ltd.", "Limited", "PLC", "Plc", "Inc", "Inc.",
|
||||||
|
"Corporation", "Corp", "Corp.", "UK", "Group", "(UK)", "& Co", "& Co."];
|
||||||
|
|
||||||
|
foreach (var suffix in suffixes)
|
||||||
|
{
|
||||||
|
if (normalized.EndsWith(" " + suffix, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
normalized = normalized[..^(suffix.Length + 1)].Trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
private static (DateOnly Start, DateOnly End, int Months)? CalculateOverlap(
|
private static (DateOnly Start, DateOnly End, int Months)? CalculateOverlap(
|
||||||
EmploymentEntry earlier,
|
EmploymentEntry earlier,
|
||||||
EmploymentEntry later)
|
EmploymentEntry later)
|
||||||
|
|||||||
13
src/RealCV.Web/Components/Layout/AuthLayout.razor
Normal file
13
src/RealCV.Web/Components/Layout/AuthLayout.razor
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
@inherits LayoutComponentBase
|
||||||
|
|
||||||
|
<div class="d-flex flex-column min-vh-100">
|
||||||
|
<main class="flex-grow-1">
|
||||||
|
@Body
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="blazor-error-ui" class="alert alert-danger fixed-bottom m-3" style="display: none;">
|
||||||
|
An unhandled error has occurred.
|
||||||
|
<a href="" class="alert-link reload">Reload</a>
|
||||||
|
<button type="button" class="btn-close float-end dismiss" aria-label="Close"></button>
|
||||||
|
</div>
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
@page "/account/login"
|
@page "/account/login"
|
||||||
@using RealCV.Web.Components.Layout
|
@using RealCV.Web.Components.Layout
|
||||||
@layout MainLayout
|
@layout AuthLayout
|
||||||
|
|
||||||
@using Microsoft.AspNetCore.Identity
|
@using Microsoft.AspNetCore.Identity
|
||||||
@using RealCV.Infrastructure.Identity
|
@using RealCV.Infrastructure.Identity
|
||||||
@@ -14,9 +14,9 @@
|
|||||||
<!-- Left side - Form -->
|
<!-- Left side - Form -->
|
||||||
<div class="auth-form-side">
|
<div class="auth-form-side">
|
||||||
<div class="auth-form-wrapper">
|
<div class="auth-form-wrapper">
|
||||||
<div class="text-center mb-4">
|
<div class="text-center mb-5">
|
||||||
<a href="/">
|
<a href="/">
|
||||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" />
|
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" style="height: 60px;" />
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
@page "/account/register"
|
@page "/account/register"
|
||||||
@using RealCV.Web.Components.Layout
|
@using RealCV.Web.Components.Layout
|
||||||
@layout MainLayout
|
@layout AuthLayout
|
||||||
@rendermode InteractiveServer
|
@rendermode InteractiveServer
|
||||||
|
|
||||||
@using Microsoft.AspNetCore.Identity
|
@using Microsoft.AspNetCore.Identity
|
||||||
@@ -16,9 +16,9 @@
|
|||||||
<!-- Left side - Form -->
|
<!-- Left side - Form -->
|
||||||
<div class="auth-form-side">
|
<div class="auth-form-side">
|
||||||
<div class="auth-form-wrapper">
|
<div class="auth-form-wrapper">
|
||||||
<div class="text-center mb-4">
|
<div class="text-center mb-5">
|
||||||
<a href="/">
|
<a href="/">
|
||||||
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" />
|
<img src="images/RealCV_Logo_Transparent.png" alt="RealCV" class="auth-logo" style="height: 60px;" />
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -151,23 +151,23 @@
|
|||||||
{
|
{
|
||||||
<div class="file-list-item">
|
<div class="file-list-item">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="file-type-icon me-3 @(file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ? "pdf" : "docx")">
|
<div class="file-type-icon me-2 @(file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ? "pdf" : "docx")">
|
||||||
@if (file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase))
|
@if (file.Name.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase))
|
||||||
{
|
{
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
|
||||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||||
</svg>
|
</svg>
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" fill="currentColor" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" fill="currentColor" viewBox="0 0 16 16">
|
||||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2V14a1 1 0 0 1-1 1H4a1 1 0 0 1-1-1V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||||
</svg>
|
</svg>
|
||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
<div class="flex-grow-1">
|
<div class="flex-grow-1 min-width-0">
|
||||||
<p class="mb-0 fw-medium">@file.Name</p>
|
<span class="file-name">@file.Name</span>
|
||||||
<small class="text-muted">@FormatFileSize(file.Size)</small>
|
<span class="file-size">@FormatFileSize(file.Size)</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<button class="btn btn-sm btn-outline-danger" @onclick="() => RemoveFile(file)">
|
<button class="btn btn-sm btn-outline-danger" @onclick="() => RemoveFile(file)">
|
||||||
@@ -267,39 +267,71 @@
|
|||||||
user-select: none;
|
user-select: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.file-list {
|
||||||
|
border: 1px solid var(--realcv-gray-200);
|
||||||
|
border-radius: 8px;
|
||||||
|
overflow: hidden;
|
||||||
|
}
|
||||||
|
|
||||||
.file-list-item {
|
.file-list-item {
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
border: 1px solid var(--realcv-gray-200);
|
padding: 0.5rem 0.75rem;
|
||||||
border-radius: 12px;
|
|
||||||
padding: 1rem;
|
|
||||||
margin-bottom: 0.75rem;
|
|
||||||
background: var(--realcv-bg-surface);
|
background: var(--realcv-bg-surface);
|
||||||
transition: all 0.2s ease;
|
border-bottom: 1px solid var(--realcv-gray-200);
|
||||||
|
transition: background 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-list-item:last-child {
|
||||||
|
border-bottom: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-list-item:hover {
|
.file-list-item:hover {
|
||||||
border-color: var(--realcv-primary);
|
background: var(--realcv-bg-muted);
|
||||||
box-shadow: 0 4px 12px rgba(59, 111, 212, 0.08);
|
}
|
||||||
|
|
||||||
|
.file-list-item:nth-child(even) {
|
||||||
|
background: rgba(0, 0, 0, 0.015);
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-list-item:nth-child(even):hover {
|
||||||
|
background: var(--realcv-bg-muted);
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-name {
|
||||||
|
font-size: 0.875rem;
|
||||||
|
font-weight: 500;
|
||||||
|
color: var(--realcv-text-primary);
|
||||||
|
overflow: hidden;
|
||||||
|
text-overflow: ellipsis;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.file-size {
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: var(--realcv-gray-500);
|
||||||
|
margin-left: 0.5rem;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-type-icon {
|
.file-type-icon {
|
||||||
width: 40px;
|
width: 28px;
|
||||||
height: 40px;
|
height: 28px;
|
||||||
border-radius: 10px;
|
border-radius: 4px;
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-type-icon.pdf {
|
.file-type-icon.pdf {
|
||||||
background: linear-gradient(135deg, #fde8e8 0%, #fcd9d9 100%);
|
background: #fef2f2;
|
||||||
color: #dc2626;
|
color: #dc2626;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-type-icon.docx {
|
.file-type-icon.docx {
|
||||||
background: linear-gradient(135deg, #e3ecf7 0%, #d4e4f4 100%);
|
background: #eff6ff;
|
||||||
color: var(--realcv-primary);
|
color: var(--realcv-primary);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,10 +118,10 @@
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
<!-- Stats Cards -->
|
<!-- Stats Cards -->
|
||||||
<div class="row mb-4 g-4">
|
<div class="row mb-3 g-3">
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<div class="card border-0 shadow-sm stat-card h-100">
|
<div class="card border-0 shadow-sm stat-card h-100">
|
||||||
<div class="card-body p-4">
|
<div class="card-body p-3">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="stat-icon stat-icon-primary me-3">
|
<div class="stat-icon stat-icon-primary me-3">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||||
@@ -139,7 +139,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<div class="card border-0 shadow-sm stat-card h-100">
|
<div class="card border-0 shadow-sm stat-card h-100">
|
||||||
<div class="card-body p-4">
|
<div class="card-body p-3">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="stat-icon stat-icon-success me-3">
|
<div class="stat-icon stat-icon-success me-3">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||||
@@ -157,7 +157,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<div class="card border-0 shadow-sm stat-card h-100">
|
<div class="card border-0 shadow-sm stat-card h-100">
|
||||||
<div class="card-body p-4">
|
<div class="card-body p-3">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="stat-icon stat-icon-warning me-3">
|
<div class="stat-icon stat-icon-warning me-3">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="28" height="28" fill="currentColor" viewBox="0 0 16 16">
|
||||||
@@ -176,7 +176,7 @@
|
|||||||
|
|
||||||
<!-- Checks List -->
|
<!-- Checks List -->
|
||||||
<div class="card border-0 shadow-sm">
|
<div class="card border-0 shadow-sm">
|
||||||
<div class="card-header py-3 border-bottom" style="background-color: var(--realcv-bg-surface);">
|
<div class="card-header py-2 px-3 border-bottom" style="background-color: var(--realcv-bg-surface);">
|
||||||
<div class="d-flex justify-content-between align-items-center">
|
<div class="d-flex justify-content-between align-items-center">
|
||||||
<div class="d-flex align-items-center gap-3">
|
<div class="d-flex align-items-center gap-3">
|
||||||
<h5 class="mb-0 fw-bold">Recent CV Checks</h5>
|
<h5 class="mb-0 fw-bold">Recent CV Checks</h5>
|
||||||
@@ -203,17 +203,17 @@
|
|||||||
<table class="table table-hover align-middle mb-0">
|
<table class="table table-hover align-middle mb-0">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="background-color: var(--realcv-bg-muted);">
|
<tr style="background-color: var(--realcv-bg-muted);">
|
||||||
<th class="border-0 ps-3 py-3" style="width: 40px;">
|
<th class="border-0 ps-3 py-2" style="width: 40px;">
|
||||||
<input type="checkbox" class="form-check-input"
|
<input type="checkbox" class="form-check-input"
|
||||||
checked="@IsAllSelected()"
|
checked="@IsAllSelected()"
|
||||||
@onchange="ToggleSelectAll"
|
@onchange="ToggleSelectAll"
|
||||||
title="Select all" />
|
title="Select all" />
|
||||||
</th>
|
</th>
|
||||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Candidate</th>
|
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Candidate</th>
|
||||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Uploaded</th>
|
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted" style="letter-spacing: 0.05em;">Uploaded</th>
|
||||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Status</th>
|
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Status</th>
|
||||||
<th class="border-0 py-3 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Score</th>
|
<th class="border-0 py-2 text-uppercase small fw-semibold text-muted text-center" style="letter-spacing: 0.05em;">Score</th>
|
||||||
<th class="border-0 py-3 pe-4 text-uppercase small fw-semibold text-muted text-end" style="letter-spacing: 0.05em;">Actions</th>
|
<th class="border-0 py-2 pe-4 text-uppercase small fw-semibold text-muted text-end" style="letter-spacing: 0.05em;">Actions</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
@@ -221,15 +221,15 @@
|
|||||||
{
|
{
|
||||||
<tr class="@(check.Status == "Completed" ? "cursor-pointer" : "") @(_selectedIds.Contains(check.Id) ? "table-active" : "")"
|
<tr class="@(check.Status == "Completed" ? "cursor-pointer" : "") @(_selectedIds.Contains(check.Id) ? "table-active" : "")"
|
||||||
@onclick="() => ViewReport(check)">
|
@onclick="() => ViewReport(check)">
|
||||||
<td class="ps-3 py-3" @onclick:stopPropagation="true">
|
<td class="ps-3 py-2" @onclick:stopPropagation="true">
|
||||||
<input type="checkbox" class="form-check-input"
|
<input type="checkbox" class="form-check-input"
|
||||||
checked="@_selectedIds.Contains(check.Id)"
|
checked="@_selectedIds.Contains(check.Id)"
|
||||||
@onchange="() => ToggleSelection(check.Id)" />
|
@onchange="() => ToggleSelection(check.Id)" />
|
||||||
</td>
|
</td>
|
||||||
<td class="py-3">
|
<td class="py-2">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<div class="file-icon-wrapper me-3">
|
<div class="file-icon-wrapper me-2">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" fill="currentColor" class="bi bi-file-earmark-person text-primary" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="currentColor" class="bi bi-file-earmark-person text-primary" viewBox="0 0 16 16">
|
||||||
<path d="M11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0z"/>
|
<path d="M11 8a3 3 0 1 1-6 0 3 3 0 0 1 6 0z"/>
|
||||||
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2v9.255S12 12 8 12s-5 1.755-5 1.755V2a1 1 0 0 1 1-1h5.5v2z"/>
|
<path d="M14 14V4.5L9.5 0H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h8a2 2 0 0 0 2-2zM9.5 3A1.5 1.5 0 0 0 11 4.5h2v9.255S12 12 8 12s-5 1.755-5 1.755V2a1 1 0 0 1 1-1h5.5v2z"/>
|
||||||
</svg>
|
</svg>
|
||||||
@@ -240,17 +240,17 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="py-3">
|
<td class="py-2">
|
||||||
<div>
|
<div>
|
||||||
<p class="mb-0 small">@check.CreatedAt.ToString("dd MMM yyyy")</p>
|
<p class="mb-0 small">@check.CreatedAt.ToString("dd MMM yyyy")</p>
|
||||||
<small class="text-muted">@check.CreatedAt.ToString("HH:mm")</small>
|
<small class="text-muted">@check.CreatedAt.ToString("HH:mm")</small>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
<td class="py-3 text-center">
|
<td class="py-2 text-center">
|
||||||
@switch (check.Status)
|
@switch (check.Status)
|
||||||
{
|
{
|
||||||
case "Completed":
|
case "Completed":
|
||||||
<span class="badge rounded-pill bg-success-subtle text-success px-3 py-2">
|
<span class="badge rounded-pill bg-success-subtle text-success px-2 py-1">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-check-circle-fill me-1" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-check-circle-fill me-1" viewBox="0 0 16 16">
|
||||||
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
|
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zm-3.97-3.03a.75.75 0 0 0-1.08.022L7.477 9.417 5.384 7.323a.75.75 0 0 0-1.06 1.06L6.97 11.03a.75.75 0 0 0 1.079-.02l3.992-4.99a.75.75 0 0 0-.01-1.05z"/>
|
||||||
</svg>
|
</svg>
|
||||||
@@ -258,13 +258,13 @@
|
|||||||
</span>
|
</span>
|
||||||
break;
|
break;
|
||||||
case "Processing":
|
case "Processing":
|
||||||
<span class="badge rounded-pill bg-primary-subtle text-primary px-3 py-2">
|
<span class="badge rounded-pill bg-primary-subtle text-primary px-2 py-1">
|
||||||
<span class="spinner-border spinner-border-sm me-1" role="status" style="width: 0.75rem; height: 0.75rem;"></span>
|
<span class="spinner-border spinner-border-sm me-1" role="status" style="width: 0.75rem; height: 0.75rem;"></span>
|
||||||
@(check.ProcessingStage ?? "Processing")
|
@(check.ProcessingStage ?? "Processing")
|
||||||
</span>
|
</span>
|
||||||
break;
|
break;
|
||||||
case "Pending":
|
case "Pending":
|
||||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-3 py-2">
|
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-2 py-1">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-clock me-1" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-clock me-1" viewBox="0 0 16 16">
|
||||||
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
|
<path d="M8 3.5a.5.5 0 0 0-1 0V9a.5.5 0 0 0 .252.434l3.5 2a.5.5 0 0 0 .496-.868L8 8.71V3.5z"/>
|
||||||
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
|
<path d="M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16zm7-8A7 7 0 1 1 1 8a7 7 0 0 1 14 0z"/>
|
||||||
@@ -273,7 +273,7 @@
|
|||||||
</span>
|
</span>
|
||||||
break;
|
break;
|
||||||
case "Failed":
|
case "Failed":
|
||||||
<span class="badge rounded-pill bg-danger-subtle text-danger px-3 py-2">
|
<span class="badge rounded-pill bg-danger-subtle text-danger px-2 py-1">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-x-circle-fill me-1" viewBox="0 0 16 16">
|
<svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" fill="currentColor" class="bi bi-x-circle-fill me-1" viewBox="0 0 16 16">
|
||||||
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
|
<path d="M16 8A8 8 0 1 1 0 8a8 8 0 0 1 16 0zM5.354 4.646a.5.5 0 1 0-.708.708L7.293 8l-2.647 2.646a.5.5 0 0 0 .708.708L8 8.707l2.646 2.647a.5.5 0 0 0 .708-.708L8.707 8l2.647-2.646a.5.5 0 0 0-.708-.708L8 7.293 5.354 4.646z"/>
|
||||||
</svg>
|
</svg>
|
||||||
@@ -281,11 +281,11 @@
|
|||||||
</span>
|
</span>
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-3 py-2">@check.Status</span>
|
<span class="badge rounded-pill bg-secondary-subtle text-secondary px-2 py-1">@check.Status</span>
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
</td>
|
</td>
|
||||||
<td class="py-3 text-center">
|
<td class="py-2 text-center">
|
||||||
@if (check.VeracityScore.HasValue)
|
@if (check.VeracityScore.HasValue)
|
||||||
{
|
{
|
||||||
<div class="score-ring-container" title="Veracity Score: @check.VeracityScore%">
|
<div class="score-ring-container" title="Veracity Score: @check.VeracityScore%">
|
||||||
@@ -303,7 +303,7 @@
|
|||||||
<span class="text-muted">--</span>
|
<span class="text-muted">--</span>
|
||||||
}
|
}
|
||||||
</td>
|
</td>
|
||||||
<td class="py-3 pe-4 text-end">
|
<td class="py-2 pe-4 text-end">
|
||||||
<div class="d-flex justify-content-end align-items-center gap-2">
|
<div class="d-flex justify-content-end align-items-center gap-2">
|
||||||
@if (check.Status == "Completed")
|
@if (check.Status == "Completed")
|
||||||
{
|
{
|
||||||
@@ -424,10 +424,10 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.file-icon-wrapper {
|
.file-icon-wrapper {
|
||||||
width: 44px;
|
width: 36px;
|
||||||
height: 44px;
|
height: 36px;
|
||||||
background: linear-gradient(135deg, #e8f1fa 0%, #d4e4f4 100%);
|
background: linear-gradient(135deg, #e8f1fa 0%, #d4e4f4 100%);
|
||||||
border-radius: 10px;
|
border-radius: 8px;
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
@@ -435,8 +435,8 @@
|
|||||||
|
|
||||||
.score-ring-container {
|
.score-ring-container {
|
||||||
position: relative;
|
position: relative;
|
||||||
width: 52px;
|
width: 44px;
|
||||||
height: 52px;
|
height: 44px;
|
||||||
display: inline-flex;
|
display: inline-flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
|
|||||||
@@ -74,7 +74,7 @@
|
|||||||
<path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>
|
<path d="M7.002 12a1 1 0 1 1 2 0 1 1 0 0 1-2 0zM7.1 5.995a.905.905 0 1 1 1.8 0l-.35 3.507a.552.552 0 0 1-1.1 0L7.1 5.995z"/>
|
||||||
</svg>
|
</svg>
|
||||||
<h4 class="mb-2">Processing Failed</h4>
|
<h4 class="mb-2">Processing Failed</h4>
|
||||||
<p class="text-muted">We encountered an error processing your CV. Please try uploading again.</p>
|
<p class="text-muted">@(!string.IsNullOrEmpty(_check.ProcessingStage) ? _check.ProcessingStage : "We encountered an error processing your CV. Please try uploading again.")</p>
|
||||||
}
|
}
|
||||||
|
|
||||||
<p class="text-muted small mt-4">
|
<p class="text-muted small mt-4">
|
||||||
|
|||||||
@@ -1051,8 +1051,12 @@ h1:focus {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.auth-logo {
|
.auth-logo {
|
||||||
height: 48px;
|
height: 60px;
|
||||||
margin-bottom: 1rem;
|
transition: opacity var(--realcv-transition);
|
||||||
|
}
|
||||||
|
|
||||||
|
.auth-logo:hover {
|
||||||
|
opacity: 0.85;
|
||||||
}
|
}
|
||||||
|
|
||||||
.auth-title {
|
.auth-title {
|
||||||
|
|||||||
319
tests/RealCV.Tests/Integration/CVBatchTester.cs
Normal file
319
tests/RealCV.Tests/Integration/CVBatchTester.cs
Normal file
@@ -0,0 +1,319 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
|
||||||
|
namespace RealCV.Tests.Integration;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Test utility to batch process CVs and output verification findings.
|
||||||
|
/// Run with: dotnet test --filter "FullyQualifiedName~CVBatchTester" -- TestRunParameters.Parameter(name=\"CvFolder\", value=\"/path/to/cvs\")
|
||||||
|
/// Or use the ProcessFolder method directly.
|
||||||
|
/// </summary>
|
||||||
|
public class CVBatchTester
|
||||||
|
{
|
||||||
|
private readonly IServiceProvider _serviceProvider;
|
||||||
|
|
||||||
|
public CVBatchTester()
|
||||||
|
{
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
ConfigureServices(services);
|
||||||
|
_serviceProvider = services.BuildServiceProvider();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void ConfigureServices(IServiceCollection services)
|
||||||
|
{
|
||||||
|
// Load configuration
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(Directory.GetCurrentDirectory())
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.AddEnvironmentVariables()
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
// Logging
|
||||||
|
services.AddLogging(builder =>
|
||||||
|
{
|
||||||
|
builder.AddConsole();
|
||||||
|
builder.SetMinimumLevel(LogLevel.Information);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Database
|
||||||
|
var connectionString = configuration.GetConnectionString("DefaultConnection")
|
||||||
|
?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(connectionString));
|
||||||
|
|
||||||
|
// Companies House
|
||||||
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
|
||||||
|
// Anthropic (for AI matching)
|
||||||
|
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
||||||
|
|
||||||
|
// Services
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Process all CVs in a folder and return verification results.
|
||||||
|
/// </summary>
|
||||||
|
public async Task<List<CVVerificationSummary>> ProcessFolderAsync(string folderPath)
|
||||||
|
{
|
||||||
|
if (!Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
throw new DirectoryNotFoundException($"Folder not found: {folderPath}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine($"Found {cvFiles.Count} CV files in {folderPath}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
var results = new List<CVVerificationSummary>();
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nProcessing: {Path.GetFileName(cvFile)}");
|
||||||
|
Console.WriteLine(new string('-', 60));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await ProcessSingleCVAsync(cvFile);
|
||||||
|
results.Add(result);
|
||||||
|
PrintSummary(result);
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"ERROR: {ex.Message}");
|
||||||
|
results.Add(new CVVerificationSummary
|
||||||
|
{
|
||||||
|
FileName = Path.GetFileName(cvFile),
|
||||||
|
Error = ex.Message
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print overall summary
|
||||||
|
Console.WriteLine("\n" + new string('=', 80));
|
||||||
|
Console.WriteLine("OVERALL SUMMARY");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
PrintOverallSummary(results);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async Task<CVVerificationSummary> ProcessSingleCVAsync(string filePath)
|
||||||
|
{
|
||||||
|
using var scope = _serviceProvider.CreateScope();
|
||||||
|
var cvParser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var educationVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
// Parse the CV
|
||||||
|
await using var fileStream = File.OpenRead(filePath);
|
||||||
|
var parsedCV = await cvParser.ParseAsync(fileStream, Path.GetFileName(filePath));
|
||||||
|
|
||||||
|
var summary = new CVVerificationSummary
|
||||||
|
{
|
||||||
|
FileName = Path.GetFileName(filePath),
|
||||||
|
CandidateName = parsedCV.FullName ?? "Unknown"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Verify employers
|
||||||
|
if (parsedCV.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
foreach (var employment in parsedCV.Employment)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
employment.CompanyName,
|
||||||
|
employment.StartDate,
|
||||||
|
employment.EndDate,
|
||||||
|
employment.JobTitle);
|
||||||
|
|
||||||
|
summary.EmployerResults.Add(new EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedName = employment.CompanyName,
|
||||||
|
MatchedName = result.MatchedCompanyName,
|
||||||
|
CompanyNumber = result.MatchedCompanyNumber,
|
||||||
|
IsVerified = result.IsVerified,
|
||||||
|
MatchScore = result.MatchScore,
|
||||||
|
Notes = result.VerificationNotes,
|
||||||
|
Status = result.CompanyStatus
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
summary.EmployerResults.Add(new EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedName = employment.CompanyName,
|
||||||
|
IsVerified = false,
|
||||||
|
Notes = $"Error: {ex.Message}"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify education
|
||||||
|
if (parsedCV.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
var educationResults = educationVerifier.VerifyAll(
|
||||||
|
parsedCV.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList());
|
||||||
|
|
||||||
|
foreach (var result in educationResults)
|
||||||
|
{
|
||||||
|
summary.EducationResults.Add(new EducationVerificationSummary
|
||||||
|
{
|
||||||
|
ClaimedInstitution = result.ClaimedInstitution,
|
||||||
|
MatchedInstitution = result.MatchedInstitution,
|
||||||
|
Qualification = result.ClaimedQualification,
|
||||||
|
IsVerified = result.IsVerified,
|
||||||
|
Status = result.Status,
|
||||||
|
Notes = result.VerificationNotes
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void PrintSummary(CVVerificationSummary summary)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Candidate: {summary.CandidateName}");
|
||||||
|
|
||||||
|
Console.WriteLine($"\n EMPLOYERS ({summary.EmployerResults.Count}):");
|
||||||
|
foreach (var emp in summary.EmployerResults)
|
||||||
|
{
|
||||||
|
var status = emp.IsVerified ? "✓" : "✗";
|
||||||
|
var matchInfo = emp.IsVerified
|
||||||
|
? $"-> {emp.MatchedName} ({emp.MatchScore}%)"
|
||||||
|
: emp.Notes ?? "Not found";
|
||||||
|
Console.WriteLine($" {status} {emp.ClaimedName}");
|
||||||
|
Console.WriteLine($" {matchInfo}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"\n EDUCATION ({summary.EducationResults.Count}):");
|
||||||
|
foreach (var edu in summary.EducationResults)
|
||||||
|
{
|
||||||
|
var status = edu.IsVerified ? "✓" : "✗";
|
||||||
|
var matchInfo = edu.IsVerified && edu.MatchedInstitution != null
|
||||||
|
? $"-> {edu.MatchedInstitution}"
|
||||||
|
: edu.Notes ?? edu.Status;
|
||||||
|
Console.WriteLine($" {status} {edu.ClaimedInstitution}");
|
||||||
|
Console.WriteLine($" {edu.Qualification}");
|
||||||
|
Console.WriteLine($" {matchInfo}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void PrintOverallSummary(List<CVVerificationSummary> results)
|
||||||
|
{
|
||||||
|
var successfulCVs = results.Count(r => r.Error == null);
|
||||||
|
var totalEmployers = results.Sum(r => r.EmployerResults.Count);
|
||||||
|
var verifiedEmployers = results.Sum(r => r.EmployerResults.Count(e => e.IsVerified));
|
||||||
|
var totalEducation = results.Sum(r => r.EducationResults.Count);
|
||||||
|
var verifiedEducation = results.Sum(r => r.EducationResults.Count(e => e.IsVerified));
|
||||||
|
|
||||||
|
Console.WriteLine($"CVs Processed: {successfulCVs}/{results.Count}");
|
||||||
|
Console.WriteLine($"Employers: {verifiedEmployers}/{totalEmployers} verified ({(totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0)}%)");
|
||||||
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({(totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0)}%)");
|
||||||
|
|
||||||
|
// List unverified employers
|
||||||
|
var unverifiedEmployers = results
|
||||||
|
.SelectMany(r => r.EmployerResults.Where(e => !e.IsVerified))
|
||||||
|
.GroupBy(e => e.ClaimedName)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (unverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED EMPLOYERS ({unverifiedEmployers.Count} unique):");
|
||||||
|
foreach (var group in unverifiedEmployers.Take(20))
|
||||||
|
{
|
||||||
|
Console.WriteLine($" - {group.Key} (x{group.Count()})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List unverified institutions
|
||||||
|
var unverifiedEducation = results
|
||||||
|
.SelectMany(r => r.EducationResults.Where(e => !e.IsVerified))
|
||||||
|
.GroupBy(e => e.ClaimedInstitution)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (unverifiedEducation.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED INSTITUTIONS ({unverifiedEducation.Count} unique):");
|
||||||
|
foreach (var group in unverifiedEducation.Take(20))
|
||||||
|
{
|
||||||
|
Console.WriteLine($" - {group.Key} (x{group.Count()})");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Export results to JSON for further analysis.
|
||||||
|
/// </summary>
|
||||||
|
public static void ExportToJson(List<CVVerificationSummary> results, string outputPath)
|
||||||
|
{
|
||||||
|
var json = JsonSerializer.Serialize(results, new JsonSerializerOptions
|
||||||
|
{
|
||||||
|
WriteIndented = true
|
||||||
|
});
|
||||||
|
File.WriteAllText(outputPath, json);
|
||||||
|
Console.WriteLine($"\nResults exported to: {outputPath}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class CVVerificationSummary
|
||||||
|
{
|
||||||
|
public string FileName { get; set; } = "";
|
||||||
|
public string CandidateName { get; set; } = "";
|
||||||
|
public string? Error { get; set; }
|
||||||
|
public List<EmployerVerificationSummary> EmployerResults { get; set; } = new();
|
||||||
|
public List<EducationVerificationSummary> EducationResults { get; set; } = new();
|
||||||
|
}
|
||||||
|
|
||||||
|
public class EmployerVerificationSummary
|
||||||
|
{
|
||||||
|
public string ClaimedName { get; set; } = "";
|
||||||
|
public string? MatchedName { get; set; }
|
||||||
|
public string? CompanyNumber { get; set; }
|
||||||
|
public bool IsVerified { get; set; }
|
||||||
|
public int MatchScore { get; set; }
|
||||||
|
public string? Notes { get; set; }
|
||||||
|
public string? Status { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class EducationVerificationSummary
|
||||||
|
{
|
||||||
|
public string ClaimedInstitution { get; set; } = "";
|
||||||
|
public string? MatchedInstitution { get; set; }
|
||||||
|
public string? Qualification { get; set; }
|
||||||
|
public bool IsVerified { get; set; }
|
||||||
|
public string? Status { get; set; }
|
||||||
|
public string? Notes { get; set; }
|
||||||
|
}
|
||||||
@@ -76,8 +76,9 @@ public class CompanyVerifierServiceTests : IDisposable
|
|||||||
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
|
_mockAiMatcher.Setup(m => m.FindBestMatchAsync(
|
||||||
It.IsAny<string>(),
|
It.IsAny<string>(),
|
||||||
It.IsAny<List<CompanyCandidate>>(),
|
It.IsAny<List<CompanyCandidate>>(),
|
||||||
|
It.IsAny<string?>(),
|
||||||
It.IsAny<CancellationToken>()))
|
It.IsAny<CancellationToken>()))
|
||||||
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, CancellationToken _) =>
|
.Returns((string cvCompanyName, List<CompanyCandidate> candidates, string? industryHint, CancellationToken _) =>
|
||||||
{
|
{
|
||||||
// Find exact or close match in candidates
|
// Find exact or close match in candidates
|
||||||
var exactMatch = candidates.FirstOrDefault(c =>
|
var exactMatch = candidates.FirstOrDefault(c =>
|
||||||
|
|||||||
179
tests/RealCV.Tests/Services/CompoundNameDetectionTests.cs
Normal file
179
tests/RealCV.Tests/Services/CompoundNameDetectionTests.cs
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
using FluentAssertions;
|
||||||
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
|
||||||
|
namespace RealCV.Tests.Services;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Tests for the rule-based compound company name detection.
|
||||||
|
/// </summary>
|
||||||
|
public sealed class CompoundNameDetectionTests
|
||||||
|
{
|
||||||
|
private readonly AICompanyNameMatcherService _sut;
|
||||||
|
|
||||||
|
public CompoundNameDetectionTests()
|
||||||
|
{
|
||||||
|
var settings = Options.Create(new AnthropicSettings { ApiKey = "test-key" });
|
||||||
|
_sut = new AICompanyNameMatcherService(settings, NullLogger<AICompanyNameMatcherService>.Instance);
|
||||||
|
}
|
||||||
|
|
||||||
|
#region Known Single Companies (should NOT be split)
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Ernst & Young")]
|
||||||
|
[InlineData("Ernst and Young")]
|
||||||
|
[InlineData("Marks & Spencer")]
|
||||||
|
[InlineData("Marks and Spencer")]
|
||||||
|
[InlineData("Procter & Gamble")]
|
||||||
|
[InlineData("Johnson & Johnson")]
|
||||||
|
[InlineData("Deloitte and Touche")]
|
||||||
|
[InlineData("Allen & Overy")]
|
||||||
|
[InlineData("Slaughter and May")]
|
||||||
|
[InlineData("Holland & Barrett")]
|
||||||
|
[InlineData("Smith & Nephew")]
|
||||||
|
[InlineData("AT&T")]
|
||||||
|
[InlineData("M&S")]
|
||||||
|
public async Task ExtractCompanyNamesAsync_KnownSingleCompany_ReturnsNull(string companyName)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull($"'{companyName}' is a known single company and should not be split");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Ernst & Young LLP")]
|
||||||
|
[InlineData("Marks & Spencer PLC")]
|
||||||
|
[InlineData("Procter & Gamble UK")]
|
||||||
|
[InlineData("Johnson & Johnson Medical")]
|
||||||
|
public async Task ExtractCompanyNamesAsync_KnownSingleCompanyWithSuffix_ReturnsNull(string companyName)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull($"'{companyName}' contains a known single company and should not be split");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Department/Division Patterns (should NOT be split)
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Tesco Stores and Distribution")]
|
||||||
|
[InlineData("BMW UK and Ireland")]
|
||||||
|
[InlineData("Google Europe and Middle East")]
|
||||||
|
[InlineData("Sales and Marketing")]
|
||||||
|
[InlineData("Research and Development")]
|
||||||
|
[InlineData("Finance and Operations")]
|
||||||
|
public async Task ExtractCompanyNamesAsync_DepartmentPattern_ReturnsNull(string companyName)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull($"'{companyName}' looks like departments/divisions and should not be split");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Compound Names with Slash (SHOULD be split)
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("ASDA/WALMART", new[] { "ASDA", "WALMART" })]
|
||||||
|
[InlineData("BBC/ITV", new[] { "BBC", "ITV" })]
|
||||||
|
[InlineData("Tesco/Sainsbury's", new[] { "Tesco", "Sainsbury's" })]
|
||||||
|
[InlineData("Microsoft/Google", new[] { "Microsoft", "Google" })]
|
||||||
|
public async Task ExtractCompanyNamesAsync_SlashSeparated_ReturnsParts(string companyName, string[] expectedParts)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().NotBeNull($"'{companyName}' contains '/' and should be split");
|
||||||
|
result.Should().BeEquivalentTo(expectedParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Compound Names with And/Ampersand
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Acme Ltd & Beta Ltd", new[] { "Acme Ltd", "Beta Ltd" })]
|
||||||
|
public async Task ExtractCompanyNamesAsync_BothPartsHaveCompanySuffix_ReturnsParts(string companyName, string[] expectedParts)
|
||||||
|
{
|
||||||
|
// When both parts clearly have company suffixes (Ltd, PLC, etc.), split them
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().NotBeNull($"'{companyName}' has company suffixes on both parts");
|
||||||
|
result.Should().BeEquivalentTo(expectedParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Corus & Laura Ashley Hotels")] // Ambiguous - neither has company suffix
|
||||||
|
[InlineData("Smith & Jones Consulting")] // Could be a single partnership
|
||||||
|
[InlineData("Acme PLC and Beta PLC")] // Matches " plc and " department pattern
|
||||||
|
public async Task ExtractCompanyNamesAsync_AmbiguousWithAnd_ReturnsNull(string companyName)
|
||||||
|
{
|
||||||
|
// Rule-based system is conservative with ambiguous & and "and" cases
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull($"'{companyName}' is ambiguous and should not be split");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
|
||||||
|
#region Edge Cases
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("")]
|
||||||
|
[InlineData(" ")]
|
||||||
|
[InlineData(null)]
|
||||||
|
public async Task ExtractCompanyNamesAsync_EmptyOrNull_ReturnsNull(string? companyName)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName!);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[InlineData("Microsoft")]
|
||||||
|
[InlineData("Google")]
|
||||||
|
[InlineData("Amazon")]
|
||||||
|
[InlineData("Apple Inc")]
|
||||||
|
[InlineData("Tesco PLC")]
|
||||||
|
public async Task ExtractCompanyNamesAsync_SimpleCompanyName_ReturnsNull(string companyName)
|
||||||
|
{
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull($"'{companyName}' is a simple company name and should not be split");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task ExtractCompanyNamesAsync_ShortParts_ReturnsNull()
|
||||||
|
{
|
||||||
|
// Arrange - Parts too short to be valid company names
|
||||||
|
var companyName = "A & B";
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var result = await _sut.ExtractCompanyNamesAsync(companyName);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
result.Should().BeNull("parts are too short to be valid company names");
|
||||||
|
}
|
||||||
|
|
||||||
|
#endregion
|
||||||
|
}
|
||||||
@@ -51,7 +51,7 @@ public sealed class EducationVerifierServiceTests
|
|||||||
var result = _sut.Verify(education);
|
var result = _sut.Verify(education);
|
||||||
|
|
||||||
// Assert
|
// Assert
|
||||||
result.VerificationNotes.Should().Contain("diploma mill blacklist");
|
result.VerificationNotes.Should().Contain("not found in accredited institutions");
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|||||||
15
tools/CVBatchTester/CVBatchTester.csproj
Normal file
15
tools/CVBatchTester/CVBatchTester.csproj
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net8.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="../../src/RealCV.Application/RealCV.Application.csproj" />
|
||||||
|
<ProjectReference Include="../../src/RealCV.Infrastructure/RealCV.Infrastructure.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
445
tools/CVBatchTester/Program.cs
Normal file
445
tools/CVBatchTester/Program.cs
Normal file
@@ -0,0 +1,445 @@
|
|||||||
|
using System.Text.Json;
|
||||||
|
using System.Text.Json.Serialization;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
|
||||||
|
namespace CVBatchTester;
|
||||||
|
|
||||||
|
// DTOs for test JSON format (snake_case with nested personal object)
|
||||||
|
record TestCVData
|
||||||
|
{
|
||||||
|
public string? CvId { get; init; }
|
||||||
|
public string? Category { get; init; }
|
||||||
|
public List<string>? ExpectedFlags { get; init; }
|
||||||
|
public TestPersonalData? Personal { get; init; }
|
||||||
|
public string? Profile { get; init; }
|
||||||
|
public List<TestEmploymentEntry>? Employment { get; init; }
|
||||||
|
public List<TestEducationEntry>? Education { get; init; }
|
||||||
|
public List<string>? Skills { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
record TestPersonalData
|
||||||
|
{
|
||||||
|
public string? Name { get; init; }
|
||||||
|
public string? Email { get; init; }
|
||||||
|
public string? Phone { get; init; }
|
||||||
|
public string? Address { get; init; }
|
||||||
|
public string? LinkedIn { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
record TestEmploymentEntry
|
||||||
|
{
|
||||||
|
public string? Company { get; init; }
|
||||||
|
public string? JobTitle { get; init; }
|
||||||
|
public string? StartDate { get; init; }
|
||||||
|
public string? EndDate { get; init; }
|
||||||
|
public string? Location { get; init; }
|
||||||
|
public string? Description { get; init; }
|
||||||
|
public List<string>? Achievements { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
record TestEducationEntry
|
||||||
|
{
|
||||||
|
public string? Institution { get; init; }
|
||||||
|
public string? Qualification { get; init; }
|
||||||
|
public string? Subject { get; init; }
|
||||||
|
public string? Classification { get; init; }
|
||||||
|
public string? StartDate { get; init; }
|
||||||
|
public string? EndDate { get; init; }
|
||||||
|
}
|
||||||
|
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
private static StreamWriter? _logWriter;
|
||||||
|
|
||||||
|
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||||
|
{
|
||||||
|
PropertyNameCaseInsensitive = true,
|
||||||
|
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
|
||||||
|
Converters = { new JsonStringEnumConverter() }
|
||||||
|
};
|
||||||
|
|
||||||
|
static async Task<int> Main(string[] args)
|
||||||
|
{
|
||||||
|
var folderPath = args.FirstOrDefault() ?? AskForFolder();
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(folderPath) || !Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
Log($"Error: Folder not found: {folderPath}");
|
||||||
|
Log("Usage: CVBatchTester <folder-path> [--output <file>]");
|
||||||
|
Log(" e.g. CVBatchTester /home/user/cvs");
|
||||||
|
Log(" e.g. CVBatchTester /home/user/cvs --output /tmp/results.log");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for --output flag
|
||||||
|
var outputIndex = Array.IndexOf(args, "--output");
|
||||||
|
var logPath = outputIndex >= 0 && outputIndex < args.Length - 1
|
||||||
|
? args[outputIndex + 1]
|
||||||
|
: Path.Combine(folderPath, $"batch-results-{DateTime.Now:yyyyMMdd-HHmmss}.log");
|
||||||
|
|
||||||
|
_logWriter = new StreamWriter(logPath, false) { AutoFlush = true };
|
||||||
|
|
||||||
|
Log($"CV Batch Verification Tester");
|
||||||
|
Log($"Processing CVs from: {folderPath}");
|
||||||
|
Log($"Output log: {logPath}");
|
||||||
|
Log($"Started: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
|
||||||
|
Log(new string('=', 80));
|
||||||
|
|
||||||
|
// Setup DI
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
ConfigureServices(services);
|
||||||
|
var provider = services.BuildServiceProvider();
|
||||||
|
|
||||||
|
// Find CV files
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly)
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".doc", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.OrderBy(f => f)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Log($"Found {cvFiles.Count} CV files\n");
|
||||||
|
|
||||||
|
if (cvFiles.Count == 0)
|
||||||
|
{
|
||||||
|
Log("No CV files found (.pdf, .docx, .doc, .json)");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track results
|
||||||
|
var allUnverifiedEmployers = new List<string>();
|
||||||
|
var allUnverifiedInstitutions = new List<string>();
|
||||||
|
var totalEmployers = 0;
|
||||||
|
var verifiedEmployers = 0;
|
||||||
|
var totalEducation = 0;
|
||||||
|
var verifiedEducation = 0;
|
||||||
|
var processedCount = 0;
|
||||||
|
var errorCount = 0;
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Log($"\n{new string('=', 80)}");
|
||||||
|
Log($"[{++processedCount}/{cvFiles.Count}] {Path.GetFileName(cvFile)}");
|
||||||
|
Log(new string('=', 80));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var scope = provider.CreateScope();
|
||||||
|
var parser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
// Parse CV - handle JSON files differently
|
||||||
|
CVData cv;
|
||||||
|
if (cvFile.EndsWith(".json", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
var jsonContent = await File.ReadAllTextAsync(cvFile);
|
||||||
|
var testCv = JsonSerializer.Deserialize<TestCVData>(jsonContent, JsonOptions)
|
||||||
|
?? throw new InvalidOperationException("Failed to deserialize JSON CV");
|
||||||
|
|
||||||
|
// Convert TestCVData to CVData
|
||||||
|
cv = ConvertTestCVData(testCv);
|
||||||
|
Log($"Loaded JSON CV: {cv.FullName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
await using var stream = File.OpenRead(cvFile);
|
||||||
|
cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||||
|
Log($"Parsed CV: {cv.FullName}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify Employers
|
||||||
|
if (cv.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
Log($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||||
|
Log(new string('-', 60));
|
||||||
|
|
||||||
|
foreach (var emp in cv.Employment)
|
||||||
|
{
|
||||||
|
totalEmployers++;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
emp.CompanyName,
|
||||||
|
emp.StartDate,
|
||||||
|
emp.EndDate,
|
||||||
|
emp.JobTitle);
|
||||||
|
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
var period = FormatPeriod(emp.StartDate, emp.EndDate);
|
||||||
|
|
||||||
|
Log($"\n {icon} {emp.CompanyName}");
|
||||||
|
Log($" Period: {period}");
|
||||||
|
Log($" Role: {emp.JobTitle}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEmployers++;
|
||||||
|
Log($" Match: {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||||
|
if (!string.IsNullOrEmpty(result.MatchedCompanyNumber))
|
||||||
|
Log($" Company #: {result.MatchedCompanyNumber}");
|
||||||
|
if (!string.IsNullOrEmpty(result.CompanyStatus))
|
||||||
|
Log($" Status: {result.CompanyStatus}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Log($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Log($"\n ✗ {emp.CompanyName}");
|
||||||
|
Log($" ERROR: {ex.Message}");
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify Education
|
||||||
|
if (cv.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
Log($"\nEDUCATION ({cv.Education.Count}):");
|
||||||
|
Log(new string('-', 60));
|
||||||
|
|
||||||
|
var eduEntries = cv.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList();
|
||||||
|
|
||||||
|
var eduResults = eduVerifier.VerifyAll(eduEntries);
|
||||||
|
|
||||||
|
foreach (var result in eduResults)
|
||||||
|
{
|
||||||
|
totalEducation++;
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
|
||||||
|
Log($"\n {icon} {result.ClaimedInstitution}");
|
||||||
|
Log($" Qualification: {result.ClaimedQualification}");
|
||||||
|
if (!string.IsNullOrEmpty(result.ClaimedSubject))
|
||||||
|
Log($" Subject: {result.ClaimedSubject}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEducation++;
|
||||||
|
if (result.MatchedInstitution != null &&
|
||||||
|
!result.MatchedInstitution.Equals(result.ClaimedInstitution, StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
Log($" Match: {result.MatchedInstitution}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
||||||
|
Log($" Status: {result.Status}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Log($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
errorCount++;
|
||||||
|
Log($"ERROR processing file: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print Summary
|
||||||
|
Log($"\n\n{new string('=', 80)}");
|
||||||
|
Log("VERIFICATION SUMMARY");
|
||||||
|
Log(new string('=', 80));
|
||||||
|
|
||||||
|
Log($"\nCVs Processed: {processedCount - errorCount}/{cvFiles.Count}");
|
||||||
|
if (errorCount > 0)
|
||||||
|
Log($"Errors: {errorCount}");
|
||||||
|
|
||||||
|
var empRate = totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0;
|
||||||
|
var eduRate = totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0;
|
||||||
|
|
||||||
|
Log($"\nEmployers: {verifiedEmployers}/{totalEmployers} verified ({empRate}%)");
|
||||||
|
Log($"Education: {verifiedEducation}/{totalEducation} verified ({eduRate}%)");
|
||||||
|
|
||||||
|
// List unverified employers
|
||||||
|
var uniqueUnverifiedEmployers = allUnverifiedEmployers
|
||||||
|
.GroupBy(e => e, StringComparer.OrdinalIgnoreCase)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ThenBy(g => g.Key)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (uniqueUnverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Log($"\n{new string('-', 60)}");
|
||||||
|
Log($"UNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count} unique):");
|
||||||
|
foreach (var group in uniqueUnverifiedEmployers)
|
||||||
|
{
|
||||||
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||||
|
Log($" - {group.Key}{count}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// List unverified institutions
|
||||||
|
var uniqueUnverifiedInstitutions = allUnverifiedInstitutions
|
||||||
|
.GroupBy(i => i, StringComparer.OrdinalIgnoreCase)
|
||||||
|
.OrderByDescending(g => g.Count())
|
||||||
|
.ThenBy(g => g.Key)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
if (uniqueUnverifiedInstitutions.Count > 0)
|
||||||
|
{
|
||||||
|
Log($"\n{new string('-', 60)}");
|
||||||
|
Log($"UNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count} unique):");
|
||||||
|
foreach (var group in uniqueUnverifiedInstitutions)
|
||||||
|
{
|
||||||
|
var count = group.Count() > 1 ? $" (x{group.Count()})" : "";
|
||||||
|
Log($" - {group.Key}{count}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Log($"\nCompleted: {DateTime.Now:yyyy-MM-dd HH:mm:ss}");
|
||||||
|
Log($"\n{new string('=', 80)}");
|
||||||
|
|
||||||
|
_logWriter?.Close();
|
||||||
|
Console.WriteLine($"\nResults written to: {logPath}");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Log(string message)
|
||||||
|
{
|
||||||
|
Console.WriteLine(message);
|
||||||
|
_logWriter?.WriteLine(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
static string AskForFolder()
|
||||||
|
{
|
||||||
|
Console.Write("Enter CV folder path: ");
|
||||||
|
return Console.ReadLine() ?? "";
|
||||||
|
}
|
||||||
|
|
||||||
|
static string FormatPeriod(DateOnly? start, DateOnly? end)
|
||||||
|
{
|
||||||
|
var startStr = start?.ToString("MMM yyyy") ?? "?";
|
||||||
|
var endStr = end?.ToString("MMM yyyy") ?? "Present";
|
||||||
|
return $"{startStr} - {endStr}";
|
||||||
|
}
|
||||||
|
|
||||||
|
static CVData ConvertTestCVData(TestCVData testCv)
|
||||||
|
{
|
||||||
|
return new CVData
|
||||||
|
{
|
||||||
|
FullName = testCv.Personal?.Name ?? "Unknown",
|
||||||
|
Email = testCv.Personal?.Email,
|
||||||
|
Phone = testCv.Personal?.Phone,
|
||||||
|
Employment = testCv.Employment?.Select(e => new EmploymentEntry
|
||||||
|
{
|
||||||
|
CompanyName = e.Company ?? "Unknown",
|
||||||
|
JobTitle = e.JobTitle ?? "Unknown",
|
||||||
|
Location = e.Location,
|
||||||
|
StartDate = ParseDate(e.StartDate),
|
||||||
|
EndDate = ParseDate(e.EndDate),
|
||||||
|
IsCurrent = e.EndDate == null,
|
||||||
|
Description = e.Description
|
||||||
|
}).ToList() ?? [],
|
||||||
|
Education = testCv.Education?.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution ?? "Unknown",
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = ParseDate(e.StartDate),
|
||||||
|
EndDate = ParseDate(e.EndDate)
|
||||||
|
}).ToList() ?? [],
|
||||||
|
Skills = testCv.Skills ?? []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static DateOnly? ParseDate(string? dateStr)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrEmpty(dateStr)) return null;
|
||||||
|
|
||||||
|
// Try parsing YYYY-MM format
|
||||||
|
if (dateStr.Length == 7 && dateStr[4] == '-')
|
||||||
|
{
|
||||||
|
if (int.TryParse(dateStr[..4], out var year) && int.TryParse(dateStr[5..], out var month))
|
||||||
|
{
|
||||||
|
return new DateOnly(year, month, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try standard parsing
|
||||||
|
if (DateOnly.TryParse(dateStr, out var date))
|
||||||
|
{
|
||||||
|
return date;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ConfigureServices(IServiceCollection services)
|
||||||
|
{
|
||||||
|
// Load configuration - try multiple locations
|
||||||
|
var configPaths = new[]
|
||||||
|
{
|
||||||
|
"/var/www/realcv",
|
||||||
|
"/git/RealCV/src/RealCV.Web",
|
||||||
|
Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "..", "src", "RealCV.Web"))
|
||||||
|
};
|
||||||
|
|
||||||
|
var webProjectPath = configPaths.FirstOrDefault(Directory.Exists) ?? "/git/RealCV/src/RealCV.Web";
|
||||||
|
Log($"Loading config from: {webProjectPath}");
|
||||||
|
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(webProjectPath)
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Production.json", optional: true)
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
// Logging - show info level for verification details
|
||||||
|
services.AddLogging(builder =>
|
||||||
|
{
|
||||||
|
builder.AddConsole();
|
||||||
|
builder.SetMinimumLevel(LogLevel.Information);
|
||||||
|
// Filter out noisy libraries
|
||||||
|
builder.AddFilter("Microsoft", LogLevel.Warning);
|
||||||
|
builder.AddFilter("System", LogLevel.Warning);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Database
|
||||||
|
var connectionString = configuration.GetConnectionString("DefaultConnection")
|
||||||
|
?? "Server=127.0.0.1;Database=RealCV;User Id=SA;Password=TrueCV_Sql2024!;TrustServerCertificate=True";
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(connectionString));
|
||||||
|
|
||||||
|
// Companies House - use configuration binding
|
||||||
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection(CompaniesHouseSettings.SectionName));
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
|
||||||
|
// Anthropic - use configuration binding
|
||||||
|
services.Configure<AnthropicSettings>(configuration.GetSection(AnthropicSettings.SectionName));
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, AICompanyNameMatcherService>();
|
||||||
|
|
||||||
|
// Services
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
}
|
||||||
|
}
|
||||||
195
tools/batch-test-cvs.cs
Normal file
195
tools/batch-test-cvs.cs
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
#!/usr/bin/env dotnet-script
|
||||||
|
#r "nuget: Microsoft.EntityFrameworkCore.SqlServer, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.Configuration.Json, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.DependencyInjection, 8.0.0"
|
||||||
|
#r "nuget: Microsoft.Extensions.Logging.Console, 8.0.0"
|
||||||
|
#r "../src/RealCV.Application/bin/Debug/net8.0/RealCV.Application.dll"
|
||||||
|
#r "../src/RealCV.Infrastructure/bin/Debug/net8.0/RealCV.Infrastructure.dll"
|
||||||
|
#r "../src/RealCV.Domain/bin/Debug/net8.0/RealCV.Domain.dll"
|
||||||
|
|
||||||
|
// This is a dotnet-script file. Run with: dotnet script batch-test-cvs.cs -- /path/to/cvs
|
||||||
|
// Install dotnet-script: dotnet tool install -g dotnet-script
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text.Json;
|
||||||
|
using Microsoft.EntityFrameworkCore;
|
||||||
|
using Microsoft.Extensions.Configuration;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
|
using RealCV.Application.Interfaces;
|
||||||
|
using RealCV.Application.Models;
|
||||||
|
using RealCV.Infrastructure.Data;
|
||||||
|
using RealCV.Infrastructure.Services;
|
||||||
|
using RealCV.Infrastructure.ExternalApis;
|
||||||
|
using RealCV.Infrastructure.Configuration;
|
||||||
|
|
||||||
|
var folderPath = Args.FirstOrDefault() ?? "/tmp/test-cvs";
|
||||||
|
|
||||||
|
if (!Directory.Exists(folderPath))
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Error: Folder not found: {folderPath}");
|
||||||
|
Console.WriteLine("Usage: dotnet script batch-test-cvs.cs -- /path/to/cvs");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"Processing CVs from: {folderPath}");
|
||||||
|
Console.WriteLine(new string('=', 80));
|
||||||
|
|
||||||
|
// Setup DI
|
||||||
|
var services = new ServiceCollection();
|
||||||
|
|
||||||
|
var configuration = new ConfigurationBuilder()
|
||||||
|
.SetBasePath(Path.Combine(Directory.GetCurrentDirectory(), "../src/RealCV.Web"))
|
||||||
|
.AddJsonFile("appsettings.json", optional: true)
|
||||||
|
.AddJsonFile("appsettings.Development.json", optional: true)
|
||||||
|
.Build();
|
||||||
|
|
||||||
|
services.AddLogging(b => b.AddConsole().SetMinimumLevel(LogLevel.Warning));
|
||||||
|
|
||||||
|
services.AddDbContextFactory<ApplicationDbContext>(options =>
|
||||||
|
options.UseSqlServer(configuration.GetConnectionString("DefaultConnection")));
|
||||||
|
|
||||||
|
services.Configure<CompaniesHouseSettings>(configuration.GetSection("CompaniesHouse"));
|
||||||
|
services.Configure<AnthropicSettings>(configuration.GetSection("Anthropic"));
|
||||||
|
|
||||||
|
services.AddHttpClient<CompaniesHouseClient>();
|
||||||
|
services.AddHttpClient<AnthropicClient>();
|
||||||
|
services.AddScoped<ICompanyNameMatcherService, CompanyNameMatcherService>();
|
||||||
|
services.AddScoped<ICompanyVerifierService, CompanyVerifierService>();
|
||||||
|
services.AddScoped<IEducationVerifierService, EducationVerifierService>();
|
||||||
|
services.AddScoped<ICVParserService, CVParserService>();
|
||||||
|
|
||||||
|
var provider = services.BuildServiceProvider();
|
||||||
|
|
||||||
|
var cvFiles = Directory.GetFiles(folderPath, "*.*")
|
||||||
|
.Where(f => f.EndsWith(".pdf", StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
f.EndsWith(".docx", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Console.WriteLine($"Found {cvFiles.Count} CV files\n");
|
||||||
|
|
||||||
|
var allUnverifiedEmployers = new List<string>();
|
||||||
|
var allUnverifiedInstitutions = new List<string>();
|
||||||
|
var totalEmployers = 0;
|
||||||
|
var verifiedEmployers = 0;
|
||||||
|
var totalEducation = 0;
|
||||||
|
var verifiedEducation = 0;
|
||||||
|
|
||||||
|
foreach (var cvFile in cvFiles)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\n{'=',-80}");
|
||||||
|
Console.WriteLine($"FILE: {Path.GetFileName(cvFile)}");
|
||||||
|
Console.WriteLine($"{'=',-80}");
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
using var scope = provider.CreateScope();
|
||||||
|
var parser = scope.ServiceProvider.GetRequiredService<ICVParserService>();
|
||||||
|
var companyVerifier = scope.ServiceProvider.GetRequiredService<ICompanyVerifierService>();
|
||||||
|
var eduVerifier = scope.ServiceProvider.GetRequiredService<IEducationVerifierService>();
|
||||||
|
|
||||||
|
await using var stream = File.OpenRead(cvFile);
|
||||||
|
var cv = await parser.ParseAsync(stream, Path.GetFileName(cvFile));
|
||||||
|
|
||||||
|
Console.WriteLine($"Candidate: {cv.PersonalInfo?.FullName ?? "Unknown"}");
|
||||||
|
|
||||||
|
// Employers
|
||||||
|
if (cv.Employment?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEMPLOYERS ({cv.Employment.Count}):");
|
||||||
|
foreach (var emp in cv.Employment)
|
||||||
|
{
|
||||||
|
totalEmployers++;
|
||||||
|
var result = await companyVerifier.VerifyCompanyAsync(
|
||||||
|
emp.CompanyName, emp.StartDate, emp.EndDate, emp.JobTitle);
|
||||||
|
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
Console.WriteLine($" {icon} {emp.CompanyName}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEmployers++;
|
||||||
|
Console.WriteLine($" → {result.MatchedCompanyName} ({result.MatchScore}%)");
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedEmployers.Add(emp.CompanyName);
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes ?? "Not found"}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Education
|
||||||
|
if (cv.Education?.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nEDUCATION ({cv.Education.Count}):");
|
||||||
|
var eduEntries = cv.Education.Select(e => new EducationEntry
|
||||||
|
{
|
||||||
|
Institution = e.Institution,
|
||||||
|
Qualification = e.Qualification,
|
||||||
|
Subject = e.Subject,
|
||||||
|
StartDate = e.StartDate,
|
||||||
|
EndDate = e.EndDate
|
||||||
|
}).ToList();
|
||||||
|
|
||||||
|
var eduResults = eduVerifier.VerifyAll(eduEntries);
|
||||||
|
foreach (var result in eduResults)
|
||||||
|
{
|
||||||
|
totalEducation++;
|
||||||
|
var icon = result.IsVerified ? "✓" : "✗";
|
||||||
|
Console.WriteLine($" {icon} {result.ClaimedInstitution}");
|
||||||
|
Console.WriteLine($" {result.ClaimedQualification}");
|
||||||
|
|
||||||
|
if (result.IsVerified)
|
||||||
|
{
|
||||||
|
verifiedEducation++;
|
||||||
|
if (result.MatchedInstitution != null && result.MatchedInstitution != result.ClaimedInstitution)
|
||||||
|
Console.WriteLine($" → {result.MatchedInstitution}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
allUnverifiedInstitutions.Add(result.ClaimedInstitution ?? "Unknown");
|
||||||
|
Console.WriteLine($" Status: {result.Status}");
|
||||||
|
if (!string.IsNullOrEmpty(result.VerificationNotes))
|
||||||
|
Console.WriteLine($" Note: {result.VerificationNotes}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"ERROR: {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
Console.WriteLine($"\n\n{'=',-80}");
|
||||||
|
Console.WriteLine("SUMMARY");
|
||||||
|
Console.WriteLine($"{'=',-80}");
|
||||||
|
Console.WriteLine($"CVs Processed: {cvFiles.Count}");
|
||||||
|
Console.WriteLine($"Employers: {verifiedEmployers}/{totalEmployers} verified ({(totalEmployers > 0 ? verifiedEmployers * 100 / totalEmployers : 0)}%)");
|
||||||
|
Console.WriteLine($"Education: {verifiedEducation}/{totalEducation} verified ({(totalEducation > 0 ? verifiedEducation * 100 / totalEducation : 0)}%)");
|
||||||
|
|
||||||
|
var uniqueUnverifiedEmployers = allUnverifiedEmployers.Distinct().OrderBy(x => x).ToList();
|
||||||
|
if (uniqueUnverifiedEmployers.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED EMPLOYERS ({uniqueUnverifiedEmployers.Count}):");
|
||||||
|
foreach (var emp in uniqueUnverifiedEmployers)
|
||||||
|
Console.WriteLine($" - {emp}");
|
||||||
|
}
|
||||||
|
|
||||||
|
var uniqueUnverifiedInstitutions = allUnverifiedInstitutions.Distinct().OrderBy(x => x).ToList();
|
||||||
|
if (uniqueUnverifiedInstitutions.Count > 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"\nUNVERIFIED INSTITUTIONS ({uniqueUnverifiedInstitutions.Count}):");
|
||||||
|
foreach (var inst in uniqueUnverifiedInstitutions)
|
||||||
|
Console.WriteLine($" - {inst}");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
Reference in New Issue
Block a user