2025-06-08 11:21:30 +01:00
< ? php
// Enhanced security headers
header ( 'X-Content-Type-Options: nosniff' );
header ( 'X-Frame-Options: DENY' );
header ( 'X-XSS-Protection: 1; mode=block' );
header ( 'Strict-Transport-Security: max-age=31536000; includeSubDomains' );
header ( 'Referrer-Policy: strict-origin-when-cross-origin' );
// Article-specific SEO variables
$article_title = " Advanced SQL Analytics Techniques for Business Intelligence " ;
$article_description = " Master advanced SQL techniques for complex analytics including window functions, CTEs, advanced joins, and optimization strategies for large-scale business intelligence. " ;
$article_keywords = " advanced SQL, SQL analytics, window functions, CTE, business intelligence SQL, SQL optimization, data analytics " ;
$article_author = " UK Data Services Analytics Team " ;
$canonical_url = " https://ukdataservices.co.uk/blog/articles/sql-analytics-advanced-techniques.php " ;
$article_published = " 2025-06-08T11:15:00+00:00 " ;
$article_modified = " 2025-06-08T16:30:00+00:00 " ;
$og_image = " https://ukdataservices.co.uk/assets/images/chart-icon.png " ;
$read_time = 16 ;
?>
<! DOCTYPE html >
< html lang = " en " >
< head >
< meta charset = " UTF-8 " >
< meta name = " viewport " content = " width=device-width, initial-scale=1.0 " >
< title >< ? php echo htmlspecialchars ( $article_title ); ?> | UK Data Services Blog</title>
< meta name = " description " content = " <?php echo htmlspecialchars( $article_description ); ?> " >
< meta name = " keywords " content = " <?php echo htmlspecialchars( $article_keywords ); ?> " >
< meta name = " author " content = " <?php echo htmlspecialchars( $article_author ); ?> " >
< meta name = " robots " content = " index, follow " >
< link rel = " canonical " href = " <?php echo htmlspecialchars( $canonical_url ); ?> " >
<!-- Article - specific meta tags -->
< meta name = " article:published_time " content = " <?php echo $article_published ; ?> " >
< meta name = " article:modified_time " content = " <?php echo $article_modified ; ?> " >
< meta name = " article:author " content = " <?php echo htmlspecialchars( $article_author ); ?> " >
< meta name = " article:section " content = " Business Intelligence " >
< meta name = " article:tag " content = " SQL, Analytics, Data Science, Business Intelligence " >
<!-- Preload critical resources -->
< link rel = " preload " href = " ../../assets/css/main.css " as = " style " >
< link rel = " preload " href = " ../../assets/images/ukds-main-logo.png " as = " image " >
<!-- Open Graph / Social Media -->
< meta property = " og:type " content = " article " >
< meta property = " og:url " content = " <?php echo htmlspecialchars( $canonical_url ); ?> " >
< meta property = " og:title " content = " <?php echo htmlspecialchars( $article_title ); ?> " >
< meta property = " og:description " content = " <?php echo htmlspecialchars( $article_description ); ?> " >
< meta property = " og:image " content = " <?php echo htmlspecialchars( $og_image ); ?> " >
< meta property = " og:image:width " content = " 1200 " >
< meta property = " og:image:height " content = " 630 " >
< meta property = " article:published_time " content = " <?php echo $article_published ; ?> " >
< meta property = " article:modified_time " content = " <?php echo $article_modified ; ?> " >
< meta property = " article:author " content = " <?php echo htmlspecialchars( $article_author ); ?> " >
<!-- Twitter Card -->
< meta name = " twitter:card " content = " summary_large_image " >
< meta name = " twitter:title " content = " <?php echo htmlspecialchars( $article_title ); ?> " >
< meta name = " twitter:description " content = " <?php echo htmlspecialchars( $article_description ); ?> " >
< meta name = " twitter:image " content = " <?php echo htmlspecialchars( $og_image ); ?> " >
<!-- Favicon and App Icons -->
< link rel = " icon " type = " image/svg+xml " href = " ../../assets/images/favicon.svg " >
< link rel = " apple-touch-icon " sizes = " 180x180 " href = " ../../assets/images/apple-touch-icon.svg " >
<!-- Fonts -->
< link rel = " preconnect " href = " https://fonts.googleapis.com " >
< link rel = " preconnect " href = " https://fonts.gstatic.com " crossorigin >
< link href = " https://fonts.googleapis.com/css2?family=Roboto+Slab:wght@300;400;500;600;700&family=Lato:wght@300;400;500;600;700&display=swap " rel = " stylesheet " >
<!-- Styles -->
< link rel = " stylesheet " href = " ../../assets/css/main.css " >
<!-- Article Schema Markup -->
< script type = " application/ld+json " >
{
" @context " : " https://schema.org " ,
" @type " : " Article " ,
" headline " : " <?php echo htmlspecialchars( $article_title ); ?> " ,
" description " : " <?php echo htmlspecialchars( $article_description ); ?> " ,
" url " : " <?php echo htmlspecialchars( $canonical_url ); ?> " ,
" datePublished " : " <?php echo $article_published ; ?> " ,
" dateModified " : " <?php echo $article_modified ; ?> " ,
" author " : {
" @type " : " Organization " ,
" name " : " <?php echo htmlspecialchars( $article_author ); ?> " ,
" url " : " https://ukdataservices.co.uk "
},
" publisher " : {
" @type " : " Organization " ,
" name " : " UK Data Services " ,
" logo " : {
" @type " : " ImageObject " ,
" url " : " https://ukdataservices.co.uk/assets/images/ukds-main-logo.png " ,
" width " : 300 ,
" height " : 100
}
},
" image " : {
" @type " : " ImageObject " ,
" url " : " <?php echo htmlspecialchars( $og_image ); ?> " ,
" width " : 1200 ,
" height " : 630
},
" mainEntityOfPage " : {
" @type " : " WebPage " ,
" @id " : " <?php echo htmlspecialchars( $canonical_url ); ?> "
},
" articleSection " : " Business Intelligence " ,
" keywords " : " <?php echo htmlspecialchars( $article_keywords ); ?> " ,
" wordCount " : 4200 ,
" timeRequired " : " PT<?php echo $read_time ; ?>M " ,
" inLanguage " : " en-GB "
}
</ script >
</ head >
< body >
<!-- Skip to content link for accessibility -->
< a href = " #main-content " class = " skip-to-content " > Skip to main content </ a >
2025-06-08 15:34:33 +00:00
< nav class = " navbar scrolled " id = " navbar " >
2025-06-08 11:21:30 +01:00
< div class = " nav-container " >
< div class = " nav-logo " >
2025-06-08 15:51:38 +00:00
< a href = " / " >
2025-06-08 11:21:30 +01:00
< img src = " ../../assets/images/ukds-main-logo.png " alt = " UK Data Services " class = " logo " loading = " eager " >
</ a >
</ div >
< div class = " nav-menu " id = " nav-menu " >
2025-06-08 15:51:38 +00:00
< a href = " / " class = " nav-link " > Home </ a >
< a href = " /#services " class = " nav-link " > Capabilities </ a >
2025-06-08 20:51:14 +00:00
< a href = " /project-types " class = " nav-link " > Project Types </ a >
< a href = " /about " class = " nav-link " > About </ a >
2025-06-08 15:51:38 +00:00
< a href = " /blog/ " class = " nav-link active " > Blog </ a >
< a href = " /#contact " class = " nav-link " > Contact </ a >
2025-06-08 20:51:14 +00:00
< a href = " /quote " class = " nav-link cta-button " > Request Consultation </ a >
2025-06-08 11:21:30 +01:00
</ div >
< div class = " nav-toggle " id = " nav-toggle " >
< span class = " bar " ></ span >
< span class = " bar " ></ span >
< span class = " bar " ></ span >
</ div >
</ div >
2025-06-09 05:47:40 +00:00
</ nav > <!-- Article Content -->
2025-06-08 11:21:30 +01:00
< main id = " main-content " >
< article class = " blog-article " >
< div class = " container " >
2025-06-09 05:47:40 +00:00
< div class = " article-meta " >
< span class = " category " >< a href = " /blog/categories/data-analytics.php " > Data analytics </ a ></ span >
< time datetime = " 2025-06-08 " > 8 June 2025 </ time >
< span class = " read-time " > 16 min read </ span >
</ div >
<!-- Article Header -->
2025-06-08 11:21:30 +01:00
< header class = " article-header " >
< h1 class = " article-title " >< ? php echo htmlspecialchars ( $article_title ); ?> </h1>
< p class = " article-subtitle " >< ? php echo htmlspecialchars ( $article_description ); ?> </p>
< div class = " article-author " >
< div class = " author-info " >
< strong > By < ? php echo htmlspecialchars ( $article_author ); ?> </strong>
< p > SQL analytics and database optimization specialists </ p >
</ div >
< div class = " article-share " >
< a href = " https://twitter.com/intent/tweet?text=<?php echo urlencode( $article_title ); ?>&url=<?php echo urlencode( $canonical_url ); ?> " target = " _blank " rel = " noopener " aria - label = " Share on Twitter " > 📤 Share </ a >
</ div >
</ div >
</ header >
<!-- Table of Contents -->
< nav class = " article-toc " >
< h2 > Table of Contents </ h2 >
< ol >
< li >< a href = " #window-functions " > Advanced Window Functions </ a ></ li >
< li >< a href = " #cte-recursive " > CTEs and Recursive Queries </ a ></ li >
< li >< a href = " #advanced-joins " > Complex Joins and Set Operations </ a ></ li >
< li >< a href = " #analytical-functions " > Analytical and Statistical Functions </ a ></ li >
< li >< a href = " #time-series-analysis " > Time Series Analysis in SQL </ a ></ li >
< li >< a href = " #performance-optimization " > Query Optimization Strategies </ a ></ li >
< li >< a href = " #data-quality " > Data Quality and Validation </ a ></ li >
< li >< a href = " #practical-examples " > Real - World Business Cases </ a ></ li >
</ ol >
</ nav >
<!-- Article Content -->
< div class = " article-content " >
< section id = " window-functions " >
< h2 > Advanced Window Functions </ h2 >
< p > Window functions are among the most powerful SQL features for analytics , enabling complex calculations across row sets without grouping restrictions . These functions provide elegant solutions for ranking , moving averages , percentiles , and comparative analysis essential for business intelligence .</ p >
< h3 > Ranking and Row Number Functions </ h3 >
< p > Ranking functions help identify top performers , outliers , and relative positioning within datasets :</ p >
< div class = " code-example " >
< h4 > Customer Revenue Ranking Example </ h4 >
< pre >< code >-- Calculate customer revenue rankings with ties handling
SELECT
customer_id ,
customer_name ,
total_revenue ,
ROW_NUMBER () OVER ( ORDER BY total_revenue DESC ) as row_num ,
RANK () OVER ( ORDER BY total_revenue DESC ) as rank_with_gaps ,
DENSE_RANK () OVER ( ORDER BY total_revenue DESC ) as dense_rank ,
NTILE ( 4 ) OVER ( ORDER BY total_revenue DESC ) as quartile ,
PERCENT_RANK () OVER ( ORDER BY total_revenue ) as percentile_rank
FROM customer_revenue_summary
WHERE date_year = 2024 ; </ code ></ pre >
</ div >
< div class = " technique-explanation " >
< h4 > Advanced Ranking Techniques </ h4 >
< div class = " technique " >
< h5 > Conditional Ranking </ h5 >
< pre >< code >-- Rank customers within regions , with revenue threshold filtering
SELECT
customer_id ,
region ,
total_revenue ,
CASE
< pre >< code > SELECT
customer_id ,
transaction_date ,
daily_revenue ,
AVG ( daily_revenue ) OVER (
ORDER BY transaction_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as seven_day_avg ,
LAG ( daily_revenue , 1 ) OVER ( ORDER BY transaction_date ) as prev_day ,
LEAD ( daily_revenue , 1 ) OVER ( ORDER BY transaction_date ) as next_day ,
FIRST_VALUE ( daily_revenue ) OVER (
ORDER BY transaction_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) as first_revenue ,
LAST_VALUE ( daily_revenue ) OVER (
ORDER BY transaction_date
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
) as last_revenue
FROM daily_customer_revenue
WHERE customer_id = 12345
ORDER BY transaction_date ; </ code ></ pre >
</ div >
< h3 > Advanced Frame Specifications </ h3 >
< p > Master different frame types for precise analytical calculations :</ p >
< div class = " frame-types " >
< div class = " frame-type " >
< h4 > ROWS vs RANGE Frame Types </ h4 >
< pre >< code >-- ROWS : Physical row - based frame ( faster , more predictable )
SELECT
order_date ,
daily_sales ,
SUM ( daily_sales ) OVER (
ORDER BY order_date
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
) as five_day_sum_rows ,
-- RANGE : Logical value - based frame ( handles ties )
SUM ( daily_sales ) OVER (
ORDER BY order_date
RANGE BETWEEN INTERVAL '2' DAY PRECEDING
AND INTERVAL '2' DAY FOLLOWING
) as five_day_sum_range
FROM daily_sales_summary ; </ code ></ pre >
</ div >
< div class = " frame-type " >
< h4 > Dynamic Frame Boundaries </ h4 >
< pre >< code >-- Month - to - date and year - to - date calculations
SELECT
order_date ,
daily_sales ,
SUM ( daily_sales ) OVER (
PARTITION BY EXTRACT ( YEAR FROM order_date ),
EXTRACT ( MONTH FROM order_date )
ORDER BY order_date
ROWS UNBOUNDED PRECEDING
) as month_to_date ,
SUM ( daily_sales ) OVER (
PARTITION BY EXTRACT ( YEAR FROM order_date )
ORDER BY order_date
ROWS UNBOUNDED PRECEDING
) as year_to_date
FROM daily_sales_summary ; </ code ></ pre >
</ div >
</ div >
</ section >
< section id = " cte-recursive " >
< h2 > CTEs and Recursive Queries </ h2 >
< p > Common Table Expressions ( CTEs ) provide readable , maintainable approaches to complex queries . Recursive CTEs enable hierarchical data processing essential for organizational structures , product categories , and network analysis .</ p >
< h3 > Basic CTE Patterns </ h3 >
< p > Structure complex queries with multiple CTEs for clarity and reusability :</ p >
< div class = " cte-example " >
< h4 > Multi - CTE Customer Analysis </ h4 >
< pre >< code >-- Complex customer segmentation using multiple CTEs
WITH customer_metrics AS (
SELECT
customer_id ,
COUNT ( DISTINCT order_id ) as order_count ,
SUM ( order_total ) as total_revenue ,
AVG ( order_total ) as avg_order_value ,
MAX ( order_date ) as last_order_date ,
MIN ( order_date ) as first_order_date
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY customer_id
),
recency_scoring AS (
SELECT
customer_id ,
CASE
WHEN DATEDIFF ( day , last_order_date , GETDATE ()) <= 30 THEN 5
WHEN DATEDIFF ( day , last_order_date , GETDATE ()) <= 90 THEN 4
WHEN DATEDIFF ( day , last_order_date , GETDATE ()) <= 180 THEN 3
WHEN DATEDIFF ( day , last_order_date , GETDATE ()) <= 365 THEN 2
ELSE 1
END as recency_score
FROM customer_metrics
),
frequency_scoring AS (
SELECT
customer_id ,
NTILE ( 5 ) OVER ( ORDER BY order_count ) as frequency_score
FROM customer_metrics
),
monetary_scoring AS (
SELECT
customer_id ,
NTILE ( 5 ) OVER ( ORDER BY total_revenue ) as monetary_score
FROM customer_metrics
)
SELECT
cm . customer_id ,
cm . total_revenue ,
cm . order_count ,
cm . avg_order_value ,
rs . recency_score ,
fs . frequency_score ,
ms . monetary_score ,
( rs . recency_score + fs . frequency_score + ms . monetary_score ) as rfm_score ,
CASE
WHEN ( rs . recency_score + fs . frequency_score + ms . monetary_score ) >= 13 THEN 'Champions'
WHEN ( rs . recency_score + fs . frequency_score + ms . monetary_score ) >= 10 THEN 'Loyal Customers'
WHEN ( rs . recency_score + fs . frequency_score + ms . monetary_score ) >= 7 THEN 'Potential Loyalists'
WHEN ( rs . recency_score + fs . frequency_score + ms . monetary_score ) >= 5 THEN 'At Risk'
ELSE 'Lost Customers'
END as customer_segment
FROM customer_metrics cm
JOIN recency_scoring rs ON cm . customer_id = rs . customer_id
JOIN frequency_scoring fs ON cm . customer_id = fs . customer_id
JOIN monetary_scoring ms ON cm . customer_id = ms . customer_id ; </ code ></ pre >
</ div >
< h3 > Recursive CTEs for Hierarchical Data </ h3 >
< p > Handle organizational structures , category trees , and network analysis with recursive queries :</ p >
< div class = " recursive-examples " >
< div class = " recursive-example " >
< h4 > Organizational Hierarchy Analysis </ h4 >
< pre >< code >-- Calculate organization levels and reporting chains
WITH RECURSIVE org_hierarchy AS (
-- Anchor : Top - level executives
SELECT
employee_id ,
employee_name ,
manager_id ,
salary ,
1 as level ,
CAST ( employee_name as VARCHAR ( 1000 )) as hierarchy_path ,
employee_id as top_manager_id
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive : Add direct reports
SELECT
e . employee_id ,
e . employee_name ,
e . manager_id ,
e . salary ,
oh . level + 1 ,
oh . hierarchy_path + ' -> ' + e . employee_name ,
oh . top_manager_id
FROM employees e
INNER JOIN org_hierarchy oh ON e . manager_id = oh . employee_id
WHERE oh . level < 10 -- Prevent infinite recursion
)
SELECT
employee_id ,
employee_name ,
level ,
hierarchy_path ,
salary ,
AVG ( salary ) OVER ( PARTITION BY level ) as avg_salary_at_level ,
COUNT ( * ) OVER ( PARTITION BY top_manager_id ) as org_size
FROM org_hierarchy
ORDER BY top_manager_id , level , employee_name ; </ code ></ pre >
</ div >
< div class = " recursive-example " >
< h4 > Product Category Tree with Aggregations </ h4 >
< pre >< code >-- Recursive category analysis with sales rollups
WITH RECURSIVE category_tree AS (
-- Anchor : Root categories
SELECT
category_id ,
category_name ,
parent_category_id ,
1 as level ,
CAST ( category_id as VARCHAR ( 1000 )) as path
FROM product_categories
WHERE parent_category_id IS NULL
UNION ALL
-- Recursive : Child categories
SELECT
pc . category_id ,
pc . category_name ,
pc . parent_category_id ,
ct . level + 1 ,
ct . path + '/' + CAST ( pc . category_id as VARCHAR )
FROM product_categories pc
INNER JOIN category_tree ct ON pc . parent_category_id = ct . category_id
),
category_sales AS (
SELECT
ct . category_id ,
ct . category_name ,
ct . level ,
ct . path ,
COALESCE ( SUM ( s . sales_amount ), 0 ) as direct_sales ,
COUNT ( DISTINCT s . product_id ) as product_count
FROM category_tree ct
LEFT JOIN products p ON ct . category_id = p . category_id
LEFT JOIN sales s ON p . product_id = s . product_id
WHERE s . sale_date >= '2024-01-01'
GROUP BY ct . category_id , ct . category_name , ct . level , ct . path
)
SELECT
category_id ,
category_name ,
level ,
REPLICATE ( ' ' , level - 1 ) + category_name as indented_name ,
direct_sales ,
product_count ,
-- Calculate total sales including subcategories
( SELECT SUM ( cs2 . direct_sales )
FROM category_sales cs2
WHERE cs2 . path LIKE cs1 . path + '%' ) as total_sales_with_children
FROM category_sales cs1
ORDER BY path ; </ code ></ pre >
</ div >
</ div >
</ section >
< section id = " advanced-joins " >
< h2 > Complex Joins and Set Operations </ h2 >
< p > Advanced join techniques and set operations enable sophisticated data analysis patterns essential for comprehensive business intelligence queries .</ p >
< h3 > Advanced Join Patterns </ h3 >
< p > Go beyond basic joins to handle complex analytical requirements :</ p >
< div class = " join-patterns " >
< div class = " join-pattern " >
< h4 > Self - Joins for Comparative Analysis </ h4 >
< pre >< code >-- Compare customer performance year - over - year
SELECT
current_year . customer_id ,
current_year . customer_name ,
current_year . total_revenue as revenue_2024 ,
previous_year . total_revenue as revenue_2023 ,
( current_year . total_revenue - COALESCE ( previous_year . total_revenue , 0 )) as revenue_change ,
CASE
WHEN previous_year . total_revenue > 0 THEN
(( current_year . total_revenue - previous_year . total_revenue )
/ previous_year . total_revenue ) * 100
ELSE NULL
END as growth_percentage
FROM (
SELECT customer_id , customer_name , SUM ( order_total ) as total_revenue
FROM orders o
JOIN customers c ON o . customer_id = c . customer_id
WHERE YEAR ( order_date ) = 2024
GROUP BY customer_id , customer_name
) current_year
LEFT JOIN (
SELECT customer_id , SUM ( order_total ) as total_revenue
FROM orders
WHERE YEAR ( order_date ) = 2023
GROUP BY customer_id
) previous_year ON current_year . customer_id = previous_year . customer_id ; </ code ></ pre >
</ div >
< div class = " join-pattern " >
< h4 > Lateral Joins for Correlated Subqueries </ h4 >
< pre >< code >-- Get top 3 products for each customer with lateral join
SELECT
c . customer_id ,
c . customer_name ,
tp . product_id ,
tp . product_name ,
tp . total_purchased ,
tp . rank_in_customer
FROM customers c
CROSS JOIN LATERAL (
SELECT
p . product_id ,
p . product_name ,
SUM ( oi . quantity ) as total_purchased ,
ROW_NUMBER () OVER ( ORDER BY SUM ( oi . quantity ) DESC ) as rank_in_customer
FROM orders o
JOIN order_items oi ON o . order_id = oi . order_id
JOIN products p ON oi . product_id = p . product_id
WHERE o . customer_id = c . customer_id
GROUP BY p . product_id , p . product_name
ORDER BY total_purchased DESC
LIMIT 3
) tp
WHERE c . customer_id IN ( SELECT customer_id FROM high_value_customers ); </ code ></ pre >
</ div >
</ div >
< h3 > Set Operations for Complex Analysis </ h3 >
< p > Combine result sets to identify patterns , gaps , and overlaps in business data :</ p >
< div class = " set-operations " >
< div class = " set-operation " >
< h4 > Customer Behavior Analysis with EXCEPT </ h4 >
< pre >< code >-- Find customers who purchased in 2023 but not in 2024
WITH customers_2023 AS (
SELECT DISTINCT customer_id
FROM orders
WHERE YEAR ( order_date ) = 2023
),
customers_2024 AS (
SELECT DISTINCT customer_id
FROM orders
WHERE YEAR ( order_date ) = 2024
),
churned_customers AS (
SELECT customer_id FROM customers_2023
EXCEPT
SELECT customer_id FROM customers_2024
)
SELECT
cc . customer_id ,
c . customer_name ,
c . email ,
last_order . last_order_date ,
last_order . last_order_total ,
lifetime_stats . total_orders ,
lifetime_stats . lifetime_value
FROM churned_customers cc
JOIN customers c ON cc . customer_id = c . customer_id
JOIN (
SELECT
customer_id ,
MAX ( order_date ) as last_order_date ,
MAX ( order_total ) as last_order_total
FROM orders
WHERE customer_id IN ( SELECT customer_id FROM churned_customers )
GROUP BY customer_id
) last_order ON cc . customer_id = last_order . customer_id
JOIN (
SELECT
customer_id ,
COUNT ( * ) as total_orders ,
SUM ( order_total ) as lifetime_value
FROM orders
WHERE customer_id IN ( SELECT customer_id FROM churned_customers )
GROUP BY customer_id
) lifetime_stats ON cc . customer_id = lifetime_stats . customer_id ; </ code ></ pre >
</ div >
< div class = " set-operation " >
< h4 > Product Affinity Analysis with INTERSECT </ h4 >
< pre >< code >-- Find products frequently bought together
WITH product_pairs AS (
SELECT
oi1 . product_id as product_a ,
oi2 . product_id as product_b ,
COUNT ( DISTINCT oi1 . order_id ) as co_purchase_count
FROM order_items oi1
JOIN order_items oi2 ON oi1 . order_id = oi2 . order_id
WHERE oi1 . product_id < oi2 . product_id -- Avoid duplicates and self - pairs
GROUP BY oi1 . product_id , oi2 . product_id
HAVING COUNT ( DISTINCT oi1 . order_id ) >= 5 -- Minimum co - purchases
),
product_stats AS (
SELECT
product_id ,
COUNT ( DISTINCT order_id ) as individual_purchase_count
FROM order_items
GROUP BY product_id
)
SELECT
pp . product_a ,
pa . product_name as product_a_name ,
pp . product_b ,
pb . product_name as product_b_name ,
pp . co_purchase_count ,
psa . individual_purchase_count as product_a_total ,
psb . individual_purchase_count as product_b_total ,
ROUND (
( pp . co_purchase_count * 1.0 / LEAST ( psa . individual_purchase_count , psb . individual_purchase_count )) * 100 ,
2
) as affinity_percentage
FROM product_pairs pp
JOIN products pa ON pp . product_a = pa . product_id
JOIN products pb ON pp . product_b = pb . product_id
JOIN product_stats psa ON pp . product_a = psa . product_id
JOIN product_stats psb ON pp . product_b = psb . product_id
ORDER BY affinity_percentage DESC , co_purchase_count DESC ; </ code ></ pre >
</ div >
</ div >
</ section >
< section id = " analytical-functions " >
< h2 > Analytical and Statistical Functions </ h2 >
< p > Modern SQL provides extensive statistical and analytical functions for advanced business intelligence without requiring external tools .</ p >
< h3 > Statistical Aggregates </ h3 >
< p > Calculate comprehensive statistics for business metrics :</ p >
< div class = " statistical-functions " >
< h4 > Comprehensive Revenue Analysis </ h4 >
< pre >< code >-- Advanced statistical analysis of revenue by region
SELECT
region ,
COUNT ( * ) as customer_count ,
-- Central tendency measures
AVG ( annual_revenue ) as mean_revenue ,
PERCENTILE_CONT ( 0.5 ) WITHIN GROUP ( ORDER BY annual_revenue ) as median_revenue ,
MODE () WITHIN GROUP ( ORDER BY annual_revenue ) as modal_revenue ,
-- Variability measures
STDDEV ( annual_revenue ) as revenue_stddev ,
VAR ( annual_revenue ) as revenue_variance ,
( STDDEV ( annual_revenue ) / AVG ( annual_revenue )) * 100 as coefficient_of_variation ,
-- Distribution measures
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY annual_revenue ) as q1 ,
PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY annual_revenue ) as q3 ,
PERCENTILE_CONT ( 0.9 ) WITHIN GROUP ( ORDER BY annual_revenue ) as p90 ,
PERCENTILE_CONT ( 0.95 ) WITHIN GROUP ( ORDER BY annual_revenue ) as p95 ,
-- Range measures
MIN ( annual_revenue ) as min_revenue ,
MAX ( annual_revenue ) as max_revenue ,
MAX ( annual_revenue ) - MIN ( annual_revenue ) as revenue_range ,
-- Outlier detection ( IQR method )
PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY annual_revenue ) -
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY annual_revenue ) as iqr ,
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY annual_revenue ) -
1.5 * ( PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY annual_revenue ) -
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY annual_revenue )) as lower_outlier_threshold ,
PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY annual_revenue ) +
1.5 * ( PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY annual_revenue ) -
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY annual_revenue )) as upper_outlier_threshold
FROM customer_revenue_summary
WHERE year = 2024
GROUP BY region
ORDER BY mean_revenue DESC ; </ code ></ pre >
</ div >
< h3 > Correlation and Regression Analysis </ h3 >
< p > Identify relationships between business metrics using SQL :</ p >
< div class = " correlation-analysis " >
< h4 > Marketing Spend vs Revenue Correlation </ h4 >
< pre >< code >-- Calculate correlation between marketing spend and revenue
WITH monthly_metrics AS (
SELECT
DATE_TRUNC ( 'month' , metric_date ) as month ,
SUM ( marketing_spend ) as total_marketing_spend ,
SUM ( revenue ) as total_revenue ,
AVG ( customer_satisfaction_score ) as avg_satisfaction
FROM business_metrics
WHERE metric_date >= '2024-01-01'
GROUP BY DATE_TRUNC ( 'month' , metric_date )
),
correlation_prep AS (
SELECT
month ,
total_marketing_spend ,
total_revenue ,
avg_satisfaction ,
AVG ( total_marketing_spend ) OVER () as mean_marketing ,
AVG ( total_revenue ) OVER () as mean_revenue ,
AVG ( avg_satisfaction ) OVER () as mean_satisfaction ,
COUNT ( * ) OVER () as n
FROM monthly_metrics
)
SELECT
-- Pearson correlation coefficient for marketing spend vs revenue
SUM (( total_marketing_spend - mean_marketing ) * ( total_revenue - mean_revenue )) /
( SQRT ( SUM ( POWER ( total_marketing_spend - mean_marketing , 2 ))) *
SQRT ( SUM ( POWER ( total_revenue - mean_revenue , 2 )))) as marketing_revenue_correlation ,
-- Simple linear regression : revenue = a + b * marketing_spend
( n * SUM ( total_marketing_spend * total_revenue ) - SUM ( total_marketing_spend ) * SUM ( total_revenue )) /
( n * SUM ( POWER ( total_marketing_spend , 2 )) - POWER ( SUM ( total_marketing_spend ), 2 )) as regression_slope ,
( SUM ( total_revenue ) -
(( n * SUM ( total_marketing_spend * total_revenue ) - SUM ( total_marketing_spend ) * SUM ( total_revenue )) /
( n * SUM ( POWER ( total_marketing_spend , 2 )) - POWER ( SUM ( total_marketing_spend ), 2 ))) * SUM ( total_marketing_spend )) / n as regression_intercept ,
-- R - squared calculation
1 - ( SUM ( POWER ( total_revenue - ( regression_intercept + regression_slope * total_marketing_spend ), 2 )) /
SUM ( POWER ( total_revenue - mean_revenue , 2 ))) as r_squared
FROM correlation_prep ; </ code ></ pre >
</ div >
</ section >
< section id = " time-series-analysis " >
< h2 > Time Series Analysis in SQL </ h2 >
< p > Time series analysis capabilities in SQL enable trend analysis , seasonality detection , and forecasting essential for business planning .</ p >
< h3 > Trend Analysis and Decomposition </ h3 >
< p > Identify underlying trends and seasonal patterns in business data :</ p >
< div class = " time-series-example " >
< h4 > Sales Trend and Seasonality Analysis </ h4 >
< pre >< code >-- Comprehensive time series decomposition
WITH daily_sales AS (
SELECT
sale_date ,
SUM ( sale_amount ) as daily_revenue ,
EXTRACT ( DOW FROM sale_date ) as day_of_week ,
EXTRACT ( MONTH FROM sale_date ) as month ,
EXTRACT ( QUARTER FROM sale_date ) as quarter
FROM sales
WHERE sale_date >= '2023-01-01' AND sale_date <= '2024-12-31'
GROUP BY sale_date
),
moving_averages AS (
SELECT
sale_date ,
daily_revenue ,
day_of_week ,
month ,
quarter ,
-- Various moving averages for trend analysis
AVG ( daily_revenue ) OVER (
ORDER BY sale_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as ma_7_day ,
AVG ( daily_revenue ) OVER (
ORDER BY sale_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as ma_30_day ,
AVG ( daily_revenue ) OVER (
ORDER BY sale_date
ROWS BETWEEN 89 PRECEDING AND CURRENT ROW
) as ma_90_day ,
-- Exponential moving average ( approximate )
daily_revenue * 0.1 +
LAG ( daily_revenue , 1 , daily_revenue ) OVER ( ORDER BY sale_date ) * 0.9 as ema_approx
FROM daily_sales
),
seasonal_decomposition AS (
SELECT
sale_date ,
daily_revenue ,
ma_30_day as trend ,
daily_revenue - ma_30_day as detrended ,
-- Calculate seasonal component by day of week
AVG ( daily_revenue - ma_30_day ) OVER (
PARTITION BY day_of_week
) as seasonal_dow ,
-- Calculate seasonal component by month
AVG ( daily_revenue - ma_30_day ) OVER (
PARTITION BY month
) as seasonal_month ,
-- Residual component
daily_revenue - ma_30_day -
AVG ( daily_revenue - ma_30_day ) OVER ( PARTITION BY day_of_week ) as residual
FROM moving_averages
WHERE ma_30_day IS NOT NULL
)
SELECT
sale_date ,
daily_revenue ,
trend ,
seasonal_dow ,
seasonal_month ,
residual ,
-- Reconstruct the time series
trend + seasonal_dow + residual as reconstructed_value ,
-- Calculate percentage components
( seasonal_dow / daily_revenue ) * 100 as seasonal_dow_pct ,
( residual / daily_revenue ) * 100 as residual_pct ,
-- Trend direction indicators
CASE
WHEN trend > LAG ( trend , 7 ) OVER ( ORDER BY sale_date ) THEN 'Increasing'
WHEN trend < LAG ( trend , 7 ) OVER ( ORDER BY sale_date ) THEN 'Decreasing'
ELSE 'Stable'
END as trend_direction
FROM seasonal_decomposition
ORDER BY sale_date ; </ code ></ pre >
</ div >
< h3 > Advanced Time Series Functions </ h3 >
< p > Utilize specialized time series functions for sophisticated analysis :</ p >
< div class = " advanced-time-series " >
< h4 > Change Point Detection and Forecasting </ h4 >
< pre >< code >-- Detect significant changes in business metrics
WITH metric_changes AS (
SELECT
metric_date ,
revenue ,
LAG ( revenue , 1 ) OVER ( ORDER BY metric_date ) as prev_revenue ,
LAG ( revenue , 7 ) OVER ( ORDER BY metric_date ) as prev_week_revenue ,
LAG ( revenue , 30 ) OVER ( ORDER BY metric_date ) as prev_month_revenue ,
-- Percentage changes
CASE
WHEN LAG ( revenue , 1 ) OVER ( ORDER BY metric_date ) > 0 THEN
(( revenue - LAG ( revenue , 1 ) OVER ( ORDER BY metric_date )) /
LAG ( revenue , 1 ) OVER ( ORDER BY metric_date )) * 100
END as daily_change_pct ,
CASE
WHEN LAG ( revenue , 7 ) OVER ( ORDER BY metric_date ) > 0 THEN
(( revenue - LAG ( revenue , 7 ) OVER ( ORDER BY metric_date )) /
LAG ( revenue , 7 ) OVER ( ORDER BY metric_date )) * 100
END as weekly_change_pct ,
-- Rolling statistics for change point detection
AVG ( revenue ) OVER (
ORDER BY metric_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as rolling_30_avg ,
STDDEV ( revenue ) OVER (
ORDER BY metric_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) as rolling_30_stddev
FROM daily_business_metrics
),
change_points AS (
SELECT
metric_date ,
revenue ,
daily_change_pct ,
weekly_change_pct ,
rolling_30_avg ,
rolling_30_stddev ,
-- Z - score for anomaly detection
CASE
WHEN rolling_30_stddev > 0 THEN
( revenue - rolling_30_avg ) / rolling_30_stddev
END as z_score ,
-- Flag significant changes
CASE
WHEN ABS ( daily_change_pct ) > 20 THEN 'Significant Daily Change'
WHEN ABS ( weekly_change_pct ) > 30 THEN 'Significant Weekly Change'
WHEN ABS (( revenue - rolling_30_avg ) / rolling_30_stddev ) > 2 THEN 'Statistical Anomaly'
ELSE 'Normal'
END as change_classification
FROM metric_changes
WHERE rolling_30_stddev IS NOT NULL
),
-- Simple linear trend for forecasting
trend_analysis AS (
SELECT
COUNT ( * ) as n ,
SUM ( EXTRACT ( DAY FROM metric_date )) as sum_x ,
SUM ( revenue ) as sum_y ,
SUM ( EXTRACT ( DAY FROM metric_date ) * revenue ) as sum_xy ,
SUM ( POWER ( EXTRACT ( DAY FROM metric_date ), 2 )) as sum_x2 ,
-- Linear regression coefficients
( n * SUM ( EXTRACT ( DAY FROM metric_date ) * revenue ) -
SUM ( EXTRACT ( DAY FROM metric_date )) * SUM ( revenue )) /
( n * SUM ( POWER ( EXTRACT ( DAY FROM metric_date ), 2 )) -
POWER ( SUM ( EXTRACT ( DAY FROM metric_date )), 2 )) as slope ,
( SUM ( revenue ) -
(( n * SUM ( EXTRACT ( DAY FROM metric_date ) * revenue ) -
SUM ( EXTRACT ( DAY FROM metric_date )) * SUM ( revenue )) /
( n * SUM ( POWER ( EXTRACT ( DAY FROM metric_date ), 2 )) -
POWER ( SUM ( EXTRACT ( DAY FROM metric_date )), 2 ))) * SUM ( EXTRACT ( DAY FROM metric_date ))) / n as intercept
FROM change_points
WHERE metric_date >= CURRENT_DATE - INTERVAL '90 days'
)
SELECT
cp . metric_date ,
cp . revenue ,
cp . change_classification ,
cp . z_score ,
-- Trend line
ta . intercept + ta . slope * EXTRACT ( DAY FROM cp . metric_date ) as trend_value ,
-- Simple forecast ( next 7 days )
ta . intercept + ta . slope * ( EXTRACT ( DAY FROM cp . metric_date ) + 7 ) as forecast_7_days
FROM change_points cp
CROSS JOIN trend_analysis ta
ORDER BY cp . metric_date ; </ code ></ pre >
</ div >
</ section >
< section id = " performance-optimization " >
< h2 > Query Optimization Strategies </ h2 >
< p > Advanced SQL analytics requires optimization techniques to handle large datasets efficiently while maintaining query readability and maintainability .</ p >
< h3 > Index Strategy for Analytics </ h3 >
< p > Design indexes specifically for analytical workloads :</ p >
< div class = " index-strategies " >
< div class = " index-strategy " >
< h4 > Composite Indexes for Window Functions </ h4 >
< pre >< code >-- Optimize window function queries with proper indexing
-- Index design for partition by + order by patterns
-- For queries with PARTITION BY customer_id ORDER BY order_date
CREATE INDEX idx_orders_customer_date_analytics ON orders (
customer_id , -- Partition column first
order_date , -- Order by column second
order_total -- Include frequently selected columns
);
-- For time series analysis queries
CREATE INDEX idx_sales_date_analytics ON sales (
sale_date , -- Primary ordering column
product_category , -- Common partition column
region -- Secondary partition column
) INCLUDE (
sale_amount , -- Avoid key lookups
quantity ,
customer_id
);
-- For ranking queries within categories
CREATE INDEX idx_products_category_ranking ON products (
category_id , -- Partition column
total_sales DESC -- Order by column with sort direction
) INCLUDE (
product_name ,
price ,
stock_level
); </ code ></ pre >
</ div >
< div class = " index-strategy " >
< h4 > Filtered Indexes for Specific Analytics </ h4 >
< pre >< code >-- Create filtered indexes for specific analytical scenarios
-- Index for active customers only
CREATE INDEX idx_orders_active_customers ON orders (
customer_id ,
order_date DESC
)
WHERE order_date >= DATEADD ( YEAR , - 2 , GETDATE ())
INCLUDE ( order_total , product_count );
-- Index for high - value transactions
CREATE INDEX idx_orders_high_value ON orders (
order_date ,
customer_id
)
WHERE order_total >= 1000
INCLUDE ( order_total , discount_amount );
-- Index for specific time periods ( quarterly analysis )
CREATE INDEX idx_sales_current_quarter ON sales (
product_id ,
sale_date
)
WHERE sale_date >= DATEADD ( QUARTER , DATEDIFF ( QUARTER , 0 , GETDATE ()), 0 )
INCLUDE ( sale_amount , quantity ); </ code ></ pre >
</ div >
</ div >
< h3 > Query Optimization Techniques </ h3 >
< p > Apply specific optimization patterns for complex analytical queries :</ p >
< div class = " optimization-techniques " >
< div class = " technique " >
< h4 > Avoiding Redundant Window Function Calculations </ h4 >
< pre >< code >-- INEFFICIENT : Multiple similar window function calls
SELECT
customer_id ,
order_date ,
order_total ,
SUM ( order_total ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_total ,
AVG ( order_total ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_avg ,
COUNT ( * ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_count ,
MAX ( order_total ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_max
FROM orders ;
-- EFFICIENT : Calculate once , derive others
WITH base_calculations AS (
SELECT
customer_id ,
order_date ,
order_total ,
SUM ( order_total ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_total ,
COUNT ( * ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_count ,
MAX ( order_total ) OVER ( PARTITION BY customer_id ORDER BY order_date ) as running_max
FROM orders
)
SELECT
customer_id ,
order_date ,
order_total ,
running_total ,
running_total / running_count as running_avg , -- Derive from existing calculations
running_count ,
running_max
FROM base_calculations ; </ code ></ pre >
</ div >
< div class = " technique " >
< h4 > Optimizing Large Aggregations </ h4 >
< pre >< code >-- Use materialized views for frequently accessed aggregations
CREATE MATERIALIZED VIEW mv_customer_monthly_stats AS
SELECT
customer_id ,
DATE_TRUNC ( 'month' , order_date ) as order_month ,
COUNT ( * ) as order_count ,
SUM ( order_total ) as total_revenue ,
AVG ( order_total ) as avg_order_value ,
MAX ( order_date ) as last_order_date
FROM orders
GROUP BY customer_id , DATE_TRUNC ( 'month' , order_date );
-- Create appropriate indexes on materialized view
CREATE INDEX idx_mv_customer_monthly_customer_month
ON mv_customer_monthly_stats ( customer_id , order_month );
-- Use partitioning for very large fact tables
CREATE TABLE sales_partitioned (
sale_id BIGINT ,
sale_date DATE ,
customer_id INT ,
product_id INT ,
sale_amount DECIMAL ( 10 , 2 ),
region VARCHAR ( 50 )
)
PARTITION BY RANGE ( sale_date ) (
PARTITION p2023 VALUES LESS THAN ( '2024-01-01' ),
PARTITION p2024_q1 VALUES LESS THAN ( '2024-04-01' ),
PARTITION p2024_q2 VALUES LESS THAN ( '2024-07-01' ),
PARTITION p2024_q3 VALUES LESS THAN ( '2024-10-01' ),
PARTITION p2024_q4 VALUES LESS THAN ( '2025-01-01' )
); </ code ></ pre >
</ div >
</ div >
</ section >
< section id = " data-quality " >
< h2 > Data Quality and Validation </ h2 >
< p > Robust data quality checks ensure analytical results are reliable and trustworthy . Implement comprehensive validation within your SQL analytics workflows .</ p >
< h3 > Comprehensive Data Quality Framework </ h3 >
< p > Build systematic data quality checks into analytical processes :</ p >
< div class = " data-quality-example " >
< h4 > Multi - Dimensional Data Quality Assessment </ h4 >
< pre >< code >-- Comprehensive data quality assessment query
WITH data_quality_metrics AS (
SELECT
'orders' as table_name ,
COUNT ( * ) as total_records ,
-- Completeness checks
COUNT ( * ) - COUNT ( customer_id ) as missing_customer_id ,
COUNT ( * ) - COUNT ( order_date ) as missing_order_date ,
COUNT ( * ) - COUNT ( order_total ) as missing_order_total ,
-- Validity checks
SUM ( CASE WHEN order_total < 0 THEN 1 ELSE 0 END ) as negative_amounts ,
SUM ( CASE WHEN order_date > CURRENT_DATE THEN 1 ELSE 0 END ) as future_dates ,
SUM ( CASE WHEN order_date < '2020-01-01' THEN 1 ELSE 0 END ) as very_old_dates ,
-- Consistency checks
SUM ( CASE WHEN order_total != (
SELECT SUM ( oi . quantity * oi . unit_price )
FROM order_items oi
WHERE oi . order_id = o . order_id
) THEN 1 ELSE 0 END ) as inconsistent_totals ,
-- Uniqueness checks
COUNT ( * ) - COUNT ( DISTINCT order_id ) as duplicate_order_ids ,
-- Range checks
SUM ( CASE WHEN order_total > 10000 THEN 1 ELSE 0 END ) as potentially_high_amounts ,
-- Statistical outliers ( using IQR method )
PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY order_total ) as q3 ,
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY order_total ) as q1 ,
PERCENTILE_CONT ( 0.75 ) WITHIN GROUP ( ORDER BY order_total ) -
PERCENTILE_CONT ( 0.25 ) WITHIN GROUP ( ORDER BY order_total ) as iqr
FROM orders o
WHERE order_date >= '2024-01-01'
),
quality_summary AS (
SELECT
table_name ,
total_records ,
-- Calculate quality percentages
ROUND (( 1.0 - ( missing_customer_id * 1.0 / total_records )) * 100 , 2 ) as customer_id_completeness ,
ROUND (( 1.0 - ( missing_order_date * 1.0 / total_records )) * 100 , 2 ) as order_date_completeness ,
ROUND (( 1.0 - ( missing_order_total * 1.0 / total_records )) * 100 , 2 ) as order_total_completeness ,
ROUND (( 1.0 - ( negative_amounts * 1.0 / total_records )) * 100 , 2 ) as amount_validity ,
ROUND (( 1.0 - ( future_dates * 1.0 / total_records )) * 100 , 2 ) as date_validity ,
ROUND (( 1.0 - ( inconsistent_totals * 1.0 / total_records )) * 100 , 2 ) as total_consistency ,
ROUND (( 1.0 - ( duplicate_order_ids * 1.0 / total_records )) * 100 , 2 ) as id_uniqueness ,
-- Outlier detection
q1 - 1.5 * iqr as lower_outlier_threshold ,
q3 + 1.5 * iqr as upper_outlier_threshold ,
-- Overall quality score ( weighted average )
ROUND ((
( 1.0 - ( missing_customer_id * 1.0 / total_records )) * 0.2 +
( 1.0 - ( missing_order_date * 1.0 / total_records )) * 0.2 +
( 1.0 - ( missing_order_total * 1.0 / total_records )) * 0.2 +
( 1.0 - ( negative_amounts * 1.0 / total_records )) * 0.15 +
( 1.0 - ( future_dates * 1.0 / total_records )) * 0.1 +
( 1.0 - ( inconsistent_totals * 1.0 / total_records )) * 0.1 +
( 1.0 - ( duplicate_order_ids * 1.0 / total_records )) * 0.05
) * 100 , 2 ) as overall_quality_score
FROM data_quality_metrics
)
SELECT
table_name ,
total_records ,
customer_id_completeness || '%' as customer_id_completeness ,
order_date_completeness || '%' as order_date_completeness ,
order_total_completeness || '%' as order_total_completeness ,
amount_validity || '%' as amount_validity ,
date_validity || '%' as date_validity ,
total_consistency || '%' as total_consistency ,
id_uniqueness || '%' as id_uniqueness ,
overall_quality_score || '%' as overall_quality_score ,
CASE
WHEN overall_quality_score >= 95 THEN 'Excellent'
WHEN overall_quality_score >= 90 THEN 'Good'
WHEN overall_quality_score >= 80 THEN 'Acceptable'
WHEN overall_quality_score >= 70 THEN 'Poor'
ELSE 'Critical'
END as quality_rating
FROM quality_summary ; </ code ></ pre >
</ div >
< h3 > Automated Data Quality Monitoring </ h3 >
< p > Implement ongoing data quality monitoring with automated alerts :</ p >
< div class = " monitoring-example " >
< h4 > Daily Data Quality Dashboard </ h4 >
< pre >< code >-- Create automated data quality monitoring
CREATE OR REPLACE VIEW daily_data_quality_dashboard AS
WITH daily_metrics AS (
SELECT
CURRENT_DATE as check_date ,
'daily_sales' as table_name ,
-- Volume checks
COUNT ( * ) as record_count ,
COUNT ( * ) - LAG ( COUNT ( * ), 1 ) OVER ( ORDER BY DATE ( created_at )) as volume_change ,
-- Completeness monitoring
COUNT ( CASE WHEN sale_amount IS NULL THEN 1 END ) as missing_amounts ,
COUNT ( CASE WHEN customer_id IS NULL THEN 1 END ) as missing_customers ,
-- Freshness checks
MAX ( created_at ) as latest_record ,
EXTRACT ( HOUR FROM ( CURRENT_TIMESTAMP - MAX ( created_at ))) as hours_since_latest ,
-- Business rule validation
COUNT ( CASE WHEN sale_amount <= 0 THEN 1 END ) as invalid_amounts ,
COUNT ( CASE WHEN sale_date > CURRENT_DATE THEN 1 END ) as future_sales ,
-- Statistical monitoring
AVG ( sale_amount ) as avg_sale_amount ,
STDDEV ( sale_amount ) as stddev_sale_amount
FROM sales
WHERE DATE ( created_at ) = CURRENT_DATE
GROUP BY DATE ( created_at )
),
quality_alerts AS (
SELECT
* ,
CASE
WHEN ABS ( volume_change ) > ( record_count * 0.2 ) THEN 'Volume Alert: >20% change'
WHEN missing_amounts > ( record_count * 0.05 ) THEN 'Completeness Alert: >5% missing amounts'
WHEN hours_since_latest > 2 THEN 'Freshness Alert: Data older than 2 hours'
WHEN invalid_amounts > 0 THEN 'Validity Alert: Invalid amounts detected'
WHEN future_sales > 0 THEN 'Logic Alert: Future sales detected'
ELSE 'No alerts'
END as alert_status ,
CASE
WHEN hours_since_latest > 4 OR invalid_amounts > ( record_count * 0.1 ) THEN 'Critical'
WHEN ABS ( volume_change ) > ( record_count * 0.2 ) OR missing_amounts > ( record_count * 0.05 ) THEN 'Warning'
ELSE 'Normal'
END as severity_level
FROM daily_metrics
)
SELECT
check_date ,
table_name ,
record_count ,
volume_change ,
ROUND (( 1.0 - missing_amounts * 1.0 / record_count ) * 100 , 2 ) as amount_completeness_pct ,
hours_since_latest ,
invalid_amounts ,
alert_status ,
severity_level ,
-- Quality score calculation
CASE
WHEN severity_level = 'Critical' THEN 0
WHEN severity_level = 'Warning' THEN 70
ELSE 100
END as daily_quality_score
FROM quality_alerts ; </ code ></ pre >
</ div >
</ section >
< section id = " practical-examples " >
< h2 > Real - World Business Cases </ h2 >
< p > Apply advanced SQL techniques to solve complex business problems across different industries and use cases .</ p >
< h3 > Customer Lifetime Value Analysis </ h3 >
< p > Calculate sophisticated CLV metrics using advanced SQL patterns :</ p >
< div class = " clv-example " >
< h4 > Predictive Customer Lifetime Value </ h4 >
< pre >< code >-- Advanced CLV calculation with cohort analysis and predictive elements
WITH customer_cohorts AS (
SELECT
customer_id ,
MIN ( order_date ) as first_order_date ,
DATE_TRUNC ( 'month' , MIN ( order_date )) as cohort_month
FROM orders
GROUP BY customer_id
),
monthly_customer_activity AS (
SELECT
c . customer_id ,
c . cohort_month ,
DATE_TRUNC ( 'month' , o . order_date ) as activity_month ,
EXTRACT ( EPOCH FROM ( DATE_TRUNC ( 'month' , o . order_date ) - c . cohort_month )) /
EXTRACT ( EPOCH FROM INTERVAL '1 month' ) as period_number ,
COUNT ( DISTINCT o . order_id ) as orders_count ,
SUM ( o . order_total ) as revenue ,
AVG ( o . order_total ) as avg_order_value
FROM customer_cohorts c
JOIN orders o ON c . customer_id = o . customer_id
GROUP BY c . customer_id , c . cohort_month , DATE_TRUNC ( 'month' , o . order_date )
),
retention_rates AS (
SELECT
cohort_month ,
period_number ,
COUNT ( DISTINCT customer_id ) as customers_active ,
FIRST_VALUE ( COUNT ( DISTINCT customer_id )) OVER (
PARTITION BY cohort_month
ORDER BY period_number
) as cohort_size ,
COUNT ( DISTINCT customer_id ) * 1.0 /
FIRST_VALUE ( COUNT ( DISTINCT customer_id )) OVER (
PARTITION BY cohort_month
ORDER BY period_number
) as retention_rate
FROM monthly_customer_activity
GROUP BY cohort_month , period_number
),
customer_metrics AS (
SELECT
c . customer_id ,
c . cohort_month ,
COUNT ( DISTINCT mca . activity_month ) as active_months ,
SUM ( mca . revenue ) as total_revenue ,
AVG ( mca . revenue ) as avg_monthly_revenue ,
MAX ( mca . activity_month ) as last_active_month ,
-- Calculate customer age in months
EXTRACT ( EPOCH FROM ( COALESCE ( MAX ( mca . activity_month ), CURRENT_DATE ) - c . cohort_month )) /
EXTRACT ( EPOCH FROM INTERVAL '1 month' ) as customer_age_months ,
-- Historical CLV ( actual )
SUM ( mca . revenue ) as historical_clv ,
-- Frequency and monetary components
COUNT ( DISTINCT mca . activity_month ) * 1.0 /
NULLIF ( EXTRACT ( EPOCH FROM ( MAX ( mca . activity_month ) - c . cohort_month )) /
EXTRACT ( EPOCH FROM INTERVAL '1 month' ), 0 ) as purchase_frequency ,
SUM ( mca . revenue ) / NULLIF ( COUNT ( DISTINCT mca . activity_month ), 0 ) as avg_revenue_per_active_month
FROM customer_cohorts c
LEFT JOIN monthly_customer_activity mca ON c . customer_id = mca . customer_id
GROUP BY c . customer_id , c . cohort_month
),
predictive_clv AS (
SELECT
cm .* ,
-- Get cohort - level retention curve
COALESCE ( AVG ( rr . retention_rate ) OVER (
PARTITION BY cm . cohort_month
), 0.1 ) as avg_cohort_retention ,
-- Predictive CLV calculation
-- Formula : ( Average Monthly Revenue × Purchase Frequency × Gross Margin ) / ( 1 + Discount Rate - Retention Rate )
CASE
WHEN avg_cohort_retention > 0 AND avg_cohort_retention < 1 THEN
( COALESCE ( avg_revenue_per_active_month , 0 ) *
COALESCE ( purchase_frequency , 0 ) *
0.3 ) / -- Assuming 30 % gross margin
( 1 + 0.01 - avg_cohort_retention ) -- 1 % monthly discount rate
ELSE historical_clv
END as predicted_clv ,
-- Risk segmentation
CASE
WHEN EXTRACT ( EPOCH FROM ( CURRENT_DATE - last_active_month )) /
EXTRACT ( EPOCH FROM INTERVAL '1 month' ) > 6 THEN 'High Risk'
WHEN EXTRACT ( EPOCH FROM ( CURRENT_DATE - last_active_month )) /
EXTRACT ( EPOCH FROM INTERVAL '1 month' ) > 3 THEN 'Medium Risk'
WHEN last_active_month >= CURRENT_DATE - INTERVAL '1 month' THEN 'Active'
ELSE 'Inactive'
END as customer_status ,
-- Value tier classification
NTILE ( 5 ) OVER ( ORDER BY historical_clv ) as value_quintile
FROM customer_metrics cm
LEFT JOIN retention_rates rr ON cm . cohort_month = rr . cohort_month
AND ROUND ( cm . customer_age_months ) = rr . period_number
)
SELECT
customer_id ,
cohort_month ,
customer_status ,
value_quintile ,
active_months ,
customer_age_months ,
ROUND ( total_revenue , 2 ) as historical_clv ,
ROUND ( predicted_clv , 2 ) as predicted_clv ,
ROUND ( avg_revenue_per_active_month , 2 ) as avg_monthly_revenue ,
ROUND ( purchase_frequency , 3 ) as purchase_frequency ,
ROUND ( avg_cohort_retention , 3 ) as cohort_retention_rate ,
-- Strategic recommendations
CASE
WHEN customer_status = 'Active' AND value_quintile >= 4 THEN 'VIP Program'
WHEN customer_status = 'Active' AND value_quintile = 3 THEN 'Loyalty Program'
WHEN customer_status = 'Medium Risk' AND value_quintile >= 3 THEN 'Retention Campaign'
WHEN customer_status = 'High Risk' AND value_quintile >= 3 THEN 'Win-Back Campaign'
WHEN customer_status = 'Inactive' THEN 'Re-engagement Required'
ELSE 'Standard Marketing'
END as recommended_action
FROM predictive_clv
WHERE predicted_clv > 0
ORDER BY predicted_clv DESC ; </ code ></ pre >
</ div >
< div class = " expert-consultation-cta " >
< h3 > Need Advanced SQL Analytics Support ? </ h3 >
< p > Our database specialists can help you implement sophisticated SQL analytics solutions that scale with your business requirements .</ p >
< a href = " ../../quote.php?service=sql-analytics " class = " btn btn-primary " > Get SQL Analytics Consultation </ a >
</ div >
</ section >
</ div >
<!-- Related Articles -->
< section class = " related-articles " >
< h2 > Related Articles </ h2 >
< div class = " related-grid " >
< article class = " related-card " >
< h3 >< a href = " business-intelligence-dashboard-design.php " > BI Dashboard Design Best Practices </ a ></ h3 >
< p > Transform your SQL analytics into compelling visual dashboards for business users .</ p >
< span class = " read-time " > 12 min read </ span >
2025-12-07 11:49:39 +00:00
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/author-bio.php' ); ?>
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/article-footer.php' ); ?>
</ div >
</ article >
2025-06-08 11:21:30 +01:00
< article class = " related-card " >
< h3 >< a href = " ../categories/business-intelligence.php " > More Business Intelligence Articles </ a ></ h3 >
< p > Explore our complete collection of business intelligence and data analytics resources .</ p >
< span class = " read-time " > Browse category </ span >
2025-12-07 11:49:39 +00:00
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/author-bio.php' ); ?>
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/article-footer.php' ); ?>
</ div >
</ article >
2025-06-08 11:21:30 +01:00
< article class = " related-card " >
2025-06-08 15:51:38 +00:00
< h3 >< a href = " /case-studies/ " > SQL Analytics Success Stories </ a ></ h3 >
2025-06-08 11:21:30 +01:00
< p > See real - world examples of advanced SQL implementations across different industries .</ p >
< span class = " read-time " > Multiple studies </ span >
2025-12-07 11:49:39 +00:00
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/author-bio.php' ); ?>
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/article-footer.php' ); ?>
</ div >
</ article >
2025-06-08 11:21:30 +01:00
</ div >
</ section >
</ div >
2025-12-07 11:49:39 +00:00
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/author-bio.php' ); ?>
< ? php include ( $_SERVER [ 'DOCUMENT_ROOT' ] . '/includes/article-footer.php' ); ?>
</ div >
</ article >
2025-06-08 11:21:30 +01:00
<!-- CTA Section -->
< section class = " cta " >
< div class = " container " >
< div class = " cta-content " >
< h2 > Need Expert SQL Analytics Services ? </ h2 >
< p > Our data engineering team builds high - performance SQL solutions that unlock insights from your business data .</ p >
< div class = " cta-buttons " >
2025-06-08 20:51:14 +00:00
< a href = " /quote " class = " btn btn-primary " > Get Free Consultation </ a >
2025-06-08 15:51:38 +00:00
< a href = " /#services " class = " btn btn-secondary " > Explore Data Services </ a >
2025-06-08 11:21:30 +01:00
</ div >
</ div >
</ div >
</ section >
</ main >
<!-- Footer -->
< footer class = " footer " >
< div class = " container " >
< div class = " footer-content " >
< div class = " footer-section " >
< div class = " footer-logo " >
< img src = " ../../assets/images/logo-white.svg " alt = " UK Data Services " loading = " lazy " >
</ div >
< p > Enterprise data intelligence solutions for modern British business . Transform your operations with accurate , actionable insights and regulatory - compliant data services .</ p >
</ div >
< div class = " footer-section " >
< h3 > Analytics Services </ h3 >
< ul >
2025-06-08 15:51:38 +00:00
< li >< a href = " /#services " > SQL Analytics </ a ></ li >
< li >< a href = " /#services " > Database Optimization </ a ></ li >
< li >< a href = " /#services " > Business Intelligence </ a ></ li >
< li >< a href = " /#services " > Data Engineering </ a ></ li >
< li >< a href = " /#services " > Performance Tuning </ a ></ li >
2025-06-08 11:21:30 +01:00
</ ul >
</ div >
< div class = " footer-section " >
< h3 > Resources </ h3 >
< ul >
2025-06-08 15:51:38 +00:00
< li >< a href = " / " > SQL Analytics Blog </ a ></ li >
< li >< a href = " /case-studies/ " > Case Studies </ a ></ li >
2025-06-08 20:51:14 +00:00
< li >< a href = " /about " > About UK Data Services </ a ></ li >
< li >< a href = " /project-types " > Project Types </ a ></ li >
< li >< a href = " /faq " > FAQ </ a ></ li >
< li >< a href = " /quote " > Request Consultation </ a ></ li >
2025-06-08 11:21:30 +01:00
</ ul >
</ div >
< div class = " footer-section " >
< h3 > Legal & Support </ h3 >
< ul >
2025-06-08 20:51:14 +00:00
< li >< a href = " /privacy-policy " > Privacy Policy </ a ></ li >
< li >< a href = " /terms-of-service " > Terms of Service </ a ></ li >
< li >< a href = " /cookie-policy " > Cookie Policy </ a ></ li >
< li >< a href = " /gdpr-compliance " > GDPR Compliance </ a ></ li >
2025-06-08 15:51:38 +00:00
< li >< a href = " /#contact " > Contact & Support </ a ></ li >
2025-06-08 11:21:30 +01:00
</ ul >
</ div >
</ div >
< div class = " footer-bottom " >
< p >& copy ; < ? php echo date ( 'Y' ); ?> UK Data Services. All rights reserved.</p>
< div class = " social-links " >
2025-12-21 08:08:45 +00:00
< a href = " https://linkedin.com/company/uk-data-services " aria - label = " LinkedIn " rel = " noopener " target = " _blank " >
2025-06-08 11:21:30 +01:00
< img src = " ../../assets/images/icon-linkedin.svg " alt = " LinkedIn " loading = " lazy " >
</ a >
< a href = " https://twitter.com/ukdataservices " aria - label = " Twitter " rel = " noopener " target = " _blank " >
< img src = " ../../assets/images/icon-twitter.svg " alt = " Twitter " loading = " lazy " >
</ a >
</ div >
</ div >
</ div >
</ footer >
<!-- Scripts -->
< script src = " ../../assets/js/main.js " ></ script >
<!-- Article - specific functionality -->
< script >
document . addEventListener ( 'DOMContentLoaded' , function () {
// Enhanced table of contents navigation
const tocLinks = document . querySelectorAll ( '.article-toc a' );
const sections = document . querySelectorAll ( '.article-content section[id]' );
// Smooth scrolling with offset for fixed header
tocLinks . forEach ( link => {
link . addEventListener ( 'click' , function ( e ) {
e . preventDefault ();
const targetId = this . getAttribute ( 'href' );
const targetSection = document . querySelector ( targetId );
if ( targetSection ) {
const headerOffset = 100 ;
const elementPosition = targetSection . getBoundingClientRect () . top ;
const offsetPosition = elementPosition + window . pageYOffset - headerOffset ;
window . scrollTo ({
top : offsetPosition ,
behavior : 'smooth'
});
}
});
});
// Reading progress indicator
const article = document . querySelector ( '.article-content' );
const progressBar = document . createElement ( 'div' );
progressBar . className = 'reading-progress' ;
progressBar . style . cssText = `
position : fixed ;
2025-06-09 05:33:32 +00:00
top : 0 ;
2025-06-08 11:21:30 +01:00
left : 0 ;
width : 0 % ;
height : 3 px ;
background : linear - gradient ( 90 deg , #179e83, #144784);
2025-06-09 05:33:32 +00:00
z - index : 1000 ;
2025-06-08 11:21:30 +01:00
transition : width 0.3 s ease ;
` ;
document . body . appendChild ( progressBar );
function updateReadingProgress () {
const articleRect = article . getBoundingClientRect ();
const articleHeight = article . offsetHeight ;
const viewportHeight = window . innerHeight ;
const scrolled = Math . max ( 0 , - articleRect . top );
const progress = Math . min ( 100 , ( scrolled / ( articleHeight - viewportHeight )) * 100 );
progressBar . style . width = progress + '%' ;
}
window . addEventListener ( 'scroll' , updateReadingProgress );
updateReadingProgress ();
});
</ script >
</ body >
</ html >