1615 lines
66 KiB
PHP
1615 lines
66 KiB
PHP
<?php
|
||
// Enhanced security headers
|
||
header('X-Content-Type-Options: nosniff');
|
||
header('X-Frame-Options: DENY');
|
||
header('X-XSS-Protection: 1; mode=block');
|
||
header('Strict-Transport-Security: max-age=31536000; includeSubDomains');
|
||
header('Referrer-Policy: strict-origin-when-cross-origin');
|
||
|
||
// Article-specific SEO variables
|
||
$article_title = "Advanced SQL Analytics Techniques for Business Intelligence";
|
||
$article_description = "Master advanced SQL techniques for complex analytics including window functions, CTEs, advanced joins, and optimization strategies for large-scale business intelligence.";
|
||
$article_keywords = "advanced SQL, SQL analytics, window functions, CTE, business intelligence SQL, SQL optimization, data analytics";
|
||
$article_author = "UK Data Services Analytics Team";
|
||
$canonical_url = "https://ukdataservices.co.uk/blog/articles/sql-analytics-advanced-techniques.php";
|
||
$article_published = "2025-06-08T11:15:00+00:00";
|
||
$article_modified = "2025-06-08T16:30:00+00:00";
|
||
$og_image = "https://ukdataservices.co.uk/assets/images/chart-icon.png";
|
||
$read_time = 16;
|
||
?>
|
||
<!DOCTYPE html>
|
||
<html lang="en">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title><?php echo htmlspecialchars($article_title); ?> | UK Data Services Blog</title>
|
||
<meta name="description" content="<?php echo htmlspecialchars($article_description); ?>">
|
||
<meta name="keywords" content="<?php echo htmlspecialchars($article_keywords); ?>">
|
||
<meta name="author" content="<?php echo htmlspecialchars($article_author); ?>">
|
||
<meta name="robots" content="index, follow">
|
||
<link rel="canonical" href="<?php echo htmlspecialchars($canonical_url); ?>">
|
||
|
||
<!-- Article-specific meta tags -->
|
||
<meta name="article:published_time" content="<?php echo $article_published; ?>">
|
||
<meta name="article:modified_time" content="<?php echo $article_modified; ?>">
|
||
<meta name="article:author" content="<?php echo htmlspecialchars($article_author); ?>">
|
||
<meta name="article:section" content="Business Intelligence">
|
||
<meta name="article:tag" content="SQL, Analytics, Data Science, Business Intelligence">
|
||
|
||
<!-- Preload critical resources -->
|
||
<link rel="preload" href="../../assets/css/main.css" as="style">
|
||
<link rel="preload" href="../../assets/images/ukds-main-logo.png" as="image">
|
||
|
||
<!-- Open Graph / Social Media -->
|
||
<meta property="og:type" content="article">
|
||
<meta property="og:url" content="<?php echo htmlspecialchars($canonical_url); ?>">
|
||
<meta property="og:title" content="<?php echo htmlspecialchars($article_title); ?>">
|
||
<meta property="og:description" content="<?php echo htmlspecialchars($article_description); ?>">
|
||
<meta property="og:image" content="<?php echo htmlspecialchars($og_image); ?>">
|
||
<meta property="og:image:width" content="1200">
|
||
<meta property="og:image:height" content="630">
|
||
<meta property="article:published_time" content="<?php echo $article_published; ?>">
|
||
<meta property="article:modified_time" content="<?php echo $article_modified; ?>">
|
||
<meta property="article:author" content="<?php echo htmlspecialchars($article_author); ?>">
|
||
|
||
<!-- Twitter Card -->
|
||
<meta name="twitter:card" content="summary_large_image">
|
||
<meta name="twitter:title" content="<?php echo htmlspecialchars($article_title); ?>">
|
||
<meta name="twitter:description" content="<?php echo htmlspecialchars($article_description); ?>">
|
||
<meta name="twitter:image" content="<?php echo htmlspecialchars($og_image); ?>">
|
||
|
||
<!-- Favicon and App Icons -->
|
||
<link rel="icon" type="image/svg+xml" href="../../assets/images/favicon.svg">
|
||
<link rel="apple-touch-icon" sizes="180x180" href="../../assets/images/apple-touch-icon.svg">
|
||
|
||
<!-- Fonts -->
|
||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
||
<link href="https://fonts.googleapis.com/css2?family=Roboto+Slab:wght@300;400;500;600;700&family=Lato:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
||
|
||
<!-- Styles -->
|
||
<link rel="stylesheet" href="../../assets/css/main.css">
|
||
|
||
<!-- Article Schema Markup -->
|
||
<script type="application/ld+json">
|
||
{
|
||
"@context": "https://schema.org",
|
||
"@type": "Article",
|
||
"headline": "<?php echo htmlspecialchars($article_title); ?>",
|
||
"description": "<?php echo htmlspecialchars($article_description); ?>",
|
||
"url": "<?php echo htmlspecialchars($canonical_url); ?>",
|
||
"datePublished": "<?php echo $article_published; ?>",
|
||
"dateModified": "<?php echo $article_modified; ?>",
|
||
"author": {
|
||
"@type": "Organization",
|
||
"name": "<?php echo htmlspecialchars($article_author); ?>",
|
||
"url": "https://ukdataservices.co.uk"
|
||
},
|
||
"publisher": {
|
||
"@type": "Organization",
|
||
"name": "UK Data Services",
|
||
"logo": {
|
||
"@type": "ImageObject",
|
||
"url": "https://ukdataservices.co.uk/assets/images/ukds-main-logo.png",
|
||
"width": 300,
|
||
"height": 100
|
||
}
|
||
},
|
||
"image": {
|
||
"@type": "ImageObject",
|
||
"url": "<?php echo htmlspecialchars($og_image); ?>",
|
||
"width": 1200,
|
||
"height": 630
|
||
},
|
||
"mainEntityOfPage": {
|
||
"@type": "WebPage",
|
||
"@id": "<?php echo htmlspecialchars($canonical_url); ?>"
|
||
},
|
||
"articleSection": "Business Intelligence",
|
||
"keywords": "<?php echo htmlspecialchars($article_keywords); ?>",
|
||
"wordCount": 4200,
|
||
"timeRequired": "PT<?php echo $read_time; ?>M",
|
||
"inLanguage": "en-GB"
|
||
}
|
||
</script>
|
||
</head>
|
||
<body>
|
||
<!-- Skip to content link for accessibility -->
|
||
<a href="#main-content" class="skip-to-content">Skip to main content</a>
|
||
|
||
<nav class="navbar scrolled" id="navbar">
|
||
<div class="nav-container">
|
||
<div class="nav-logo">
|
||
<a href="/">
|
||
<img src="../../assets/images/ukds-main-logo.png" alt="UK Data Services" class="logo" loading="eager">
|
||
</a>
|
||
</div>
|
||
<div class="nav-menu" id="nav-menu">
|
||
<a href="/" class="nav-link">Home</a>
|
||
<a href="/#services" class="nav-link">Capabilities</a>
|
||
<a href="/project-types" class="nav-link">Project Types</a>
|
||
<a href="/about" class="nav-link">About</a>
|
||
<a href="/blog/" class="nav-link active">Blog</a>
|
||
<a href="/#contact" class="nav-link">Contact</a>
|
||
<a href="/quote" class="nav-link cta-button">Request Consultation</a>
|
||
</div>
|
||
<div class="nav-toggle" id="nav-toggle">
|
||
<span class="bar"></span>
|
||
<span class="bar"></span>
|
||
<span class="bar"></span>
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<!-- Breadcrumb Navigation -->
|
||
<nav class="breadcrumb" aria-label="Breadcrumb">
|
||
<ol>
|
||
<li><a href="/">Home</a></li>
|
||
<li><a href="/blog/">Blog</a></li>
|
||
<li><a href="../categories/business-intelligence.php">Business Intelligence</a></li>
|
||
<li aria-current="page"><span>Advanced SQL Analytics</span></li>
|
||
</ol>
|
||
</nav>
|
||
|
||
<!-- Article Content -->
|
||
<main id="main-content">
|
||
<article class="blog-article">
|
||
<div class="container">
|
||
<!-- Article Header -->
|
||
<header class="article-header">
|
||
<div class="article-meta">
|
||
<a href="../categories/business-intelligence.php" class="category-link">Business Intelligence</a>
|
||
<time datetime="<?php echo $article_published; ?>" class="publish-date">8 June 2025</time>
|
||
<span class="read-time"><?php echo $read_time; ?> min read</span>
|
||
</div>
|
||
|
||
<h1 class="article-title"><?php echo htmlspecialchars($article_title); ?></h1>
|
||
|
||
<p class="article-subtitle"><?php echo htmlspecialchars($article_description); ?></p>
|
||
|
||
<div class="article-author">
|
||
<div class="author-info">
|
||
<strong>By <?php echo htmlspecialchars($article_author); ?></strong>
|
||
<p>SQL analytics and database optimization specialists</p>
|
||
</div>
|
||
<div class="article-share">
|
||
<a href="https://twitter.com/intent/tweet?text=<?php echo urlencode($article_title); ?>&url=<?php echo urlencode($canonical_url); ?>" target="_blank" rel="noopener" aria-label="Share on Twitter">📤 Share</a>
|
||
</div>
|
||
</div>
|
||
</header>
|
||
|
||
<!-- Table of Contents -->
|
||
<nav class="article-toc">
|
||
<h2>Table of Contents</h2>
|
||
<ol>
|
||
<li><a href="#window-functions">Advanced Window Functions</a></li>
|
||
<li><a href="#cte-recursive">CTEs and Recursive Queries</a></li>
|
||
<li><a href="#advanced-joins">Complex Joins and Set Operations</a></li>
|
||
<li><a href="#analytical-functions">Analytical and Statistical Functions</a></li>
|
||
<li><a href="#time-series-analysis">Time Series Analysis in SQL</a></li>
|
||
<li><a href="#performance-optimization">Query Optimization Strategies</a></li>
|
||
<li><a href="#data-quality">Data Quality and Validation</a></li>
|
||
<li><a href="#practical-examples">Real-World Business Cases</a></li>
|
||
</ol>
|
||
</nav>
|
||
|
||
<!-- Article Content -->
|
||
<div class="article-content">
|
||
<section id="window-functions">
|
||
<h2>Advanced Window Functions</h2>
|
||
<p>Window functions are among the most powerful SQL features for analytics, enabling complex calculations across row sets without grouping restrictions. These functions provide elegant solutions for ranking, moving averages, percentiles, and comparative analysis essential for business intelligence.</p>
|
||
|
||
<h3>Ranking and Row Number Functions</h3>
|
||
<p>Ranking functions help identify top performers, outliers, and relative positioning within datasets:</p>
|
||
|
||
<div class="code-example">
|
||
<h4>Customer Revenue Ranking Example</h4>
|
||
<pre><code>-- Calculate customer revenue rankings with ties handling
|
||
SELECT
|
||
customer_id,
|
||
customer_name,
|
||
total_revenue,
|
||
ROW_NUMBER() OVER (ORDER BY total_revenue DESC) as row_num,
|
||
RANK() OVER (ORDER BY total_revenue DESC) as rank_with_gaps,
|
||
DENSE_RANK() OVER (ORDER BY total_revenue DESC) as dense_rank,
|
||
NTILE(4) OVER (ORDER BY total_revenue DESC) as quartile,
|
||
PERCENT_RANK() OVER (ORDER BY total_revenue) as percentile_rank
|
||
FROM customer_revenue_summary
|
||
WHERE date_year = 2024;</code></pre>
|
||
</div>
|
||
|
||
<div class="technique-explanation">
|
||
<h4>Advanced Ranking Techniques</h4>
|
||
|
||
<div class="technique">
|
||
<h5>Conditional Ranking</h5>
|
||
<pre><code>-- Rank customers within regions, with revenue threshold filtering
|
||
SELECT
|
||
customer_id,
|
||
region,
|
||
total_revenue,
|
||
CASE
|
||
<pre><code>SELECT
|
||
customer_id,
|
||
transaction_date,
|
||
daily_revenue,
|
||
AVG(daily_revenue) OVER (
|
||
ORDER BY transaction_date
|
||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||
) as seven_day_avg,
|
||
|
||
LAG(daily_revenue, 1) OVER (ORDER BY transaction_date) as prev_day,
|
||
LEAD(daily_revenue, 1) OVER (ORDER BY transaction_date) as next_day,
|
||
|
||
FIRST_VALUE(daily_revenue) OVER (
|
||
ORDER BY transaction_date
|
||
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
||
) as first_revenue,
|
||
|
||
LAST_VALUE(daily_revenue) OVER (
|
||
ORDER BY transaction_date
|
||
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
|
||
) as last_revenue
|
||
FROM daily_customer_revenue
|
||
WHERE customer_id = 12345
|
||
ORDER BY transaction_date;</code></pre>
|
||
</div>
|
||
|
||
<h3>Advanced Frame Specifications</h3>
|
||
<p>Master different frame types for precise analytical calculations:</p>
|
||
|
||
<div class="frame-types">
|
||
<div class="frame-type">
|
||
<h4>ROWS vs RANGE Frame Types</h4>
|
||
<pre><code>-- ROWS: Physical row-based frame (faster, more predictable)
|
||
SELECT
|
||
order_date,
|
||
daily_sales,
|
||
SUM(daily_sales) OVER (
|
||
ORDER BY order_date
|
||
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING
|
||
) as five_day_sum_rows,
|
||
|
||
-- RANGE: Logical value-based frame (handles ties)
|
||
SUM(daily_sales) OVER (
|
||
ORDER BY order_date
|
||
RANGE BETWEEN INTERVAL '2' DAY PRECEDING
|
||
AND INTERVAL '2' DAY FOLLOWING
|
||
) as five_day_sum_range
|
||
FROM daily_sales_summary;</code></pre>
|
||
</div>
|
||
|
||
<div class="frame-type">
|
||
<h4>Dynamic Frame Boundaries</h4>
|
||
<pre><code>-- Month-to-date and year-to-date calculations
|
||
SELECT
|
||
order_date,
|
||
daily_sales,
|
||
SUM(daily_sales) OVER (
|
||
PARTITION BY EXTRACT(YEAR FROM order_date),
|
||
EXTRACT(MONTH FROM order_date)
|
||
ORDER BY order_date
|
||
ROWS UNBOUNDED PRECEDING
|
||
) as month_to_date,
|
||
|
||
SUM(daily_sales) OVER (
|
||
PARTITION BY EXTRACT(YEAR FROM order_date)
|
||
ORDER BY order_date
|
||
ROWS UNBOUNDED PRECEDING
|
||
) as year_to_date
|
||
FROM daily_sales_summary;</code></pre>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="cte-recursive">
|
||
<h2>CTEs and Recursive Queries</h2>
|
||
<p>Common Table Expressions (CTEs) provide readable, maintainable approaches to complex queries. Recursive CTEs enable hierarchical data processing essential for organizational structures, product categories, and network analysis.</p>
|
||
|
||
<h3>Basic CTE Patterns</h3>
|
||
<p>Structure complex queries with multiple CTEs for clarity and reusability:</p>
|
||
|
||
<div class="cte-example">
|
||
<h4>Multi-CTE Customer Analysis</h4>
|
||
<pre><code>-- Complex customer segmentation using multiple CTEs
|
||
WITH customer_metrics AS (
|
||
SELECT
|
||
customer_id,
|
||
COUNT(DISTINCT order_id) as order_count,
|
||
SUM(order_total) as total_revenue,
|
||
AVG(order_total) as avg_order_value,
|
||
MAX(order_date) as last_order_date,
|
||
MIN(order_date) as first_order_date
|
||
FROM orders
|
||
WHERE order_date >= '2024-01-01'
|
||
GROUP BY customer_id
|
||
),
|
||
|
||
recency_scoring AS (
|
||
SELECT
|
||
customer_id,
|
||
CASE
|
||
WHEN DATEDIFF(day, last_order_date, GETDATE()) <= 30 THEN 5
|
||
WHEN DATEDIFF(day, last_order_date, GETDATE()) <= 90 THEN 4
|
||
WHEN DATEDIFF(day, last_order_date, GETDATE()) <= 180 THEN 3
|
||
WHEN DATEDIFF(day, last_order_date, GETDATE()) <= 365 THEN 2
|
||
ELSE 1
|
||
END as recency_score
|
||
FROM customer_metrics
|
||
),
|
||
|
||
frequency_scoring AS (
|
||
SELECT
|
||
customer_id,
|
||
NTILE(5) OVER (ORDER BY order_count) as frequency_score
|
||
FROM customer_metrics
|
||
),
|
||
|
||
monetary_scoring AS (
|
||
SELECT
|
||
customer_id,
|
||
NTILE(5) OVER (ORDER BY total_revenue) as monetary_score
|
||
FROM customer_metrics
|
||
)
|
||
|
||
SELECT
|
||
cm.customer_id,
|
||
cm.total_revenue,
|
||
cm.order_count,
|
||
cm.avg_order_value,
|
||
rs.recency_score,
|
||
fs.frequency_score,
|
||
ms.monetary_score,
|
||
(rs.recency_score + fs.frequency_score + ms.monetary_score) as rfm_score,
|
||
CASE
|
||
WHEN (rs.recency_score + fs.frequency_score + ms.monetary_score) >= 13 THEN 'Champions'
|
||
WHEN (rs.recency_score + fs.frequency_score + ms.monetary_score) >= 10 THEN 'Loyal Customers'
|
||
WHEN (rs.recency_score + fs.frequency_score + ms.monetary_score) >= 7 THEN 'Potential Loyalists'
|
||
WHEN (rs.recency_score + fs.frequency_score + ms.monetary_score) >= 5 THEN 'At Risk'
|
||
ELSE 'Lost Customers'
|
||
END as customer_segment
|
||
FROM customer_metrics cm
|
||
JOIN recency_scoring rs ON cm.customer_id = rs.customer_id
|
||
JOIN frequency_scoring fs ON cm.customer_id = fs.customer_id
|
||
JOIN monetary_scoring ms ON cm.customer_id = ms.customer_id;</code></pre>
|
||
</div>
|
||
|
||
<h3>Recursive CTEs for Hierarchical Data</h3>
|
||
<p>Handle organizational structures, category trees, and network analysis with recursive queries:</p>
|
||
|
||
<div class="recursive-examples">
|
||
<div class="recursive-example">
|
||
<h4>Organizational Hierarchy Analysis</h4>
|
||
<pre><code>-- Calculate organization levels and reporting chains
|
||
WITH RECURSIVE org_hierarchy AS (
|
||
-- Anchor: Top-level executives
|
||
SELECT
|
||
employee_id,
|
||
employee_name,
|
||
manager_id,
|
||
salary,
|
||
1 as level,
|
||
CAST(employee_name as VARCHAR(1000)) as hierarchy_path,
|
||
employee_id as top_manager_id
|
||
FROM employees
|
||
WHERE manager_id IS NULL
|
||
|
||
UNION ALL
|
||
|
||
-- Recursive: Add direct reports
|
||
SELECT
|
||
e.employee_id,
|
||
e.employee_name,
|
||
e.manager_id,
|
||
e.salary,
|
||
oh.level + 1,
|
||
oh.hierarchy_path + ' -> ' + e.employee_name,
|
||
oh.top_manager_id
|
||
FROM employees e
|
||
INNER JOIN org_hierarchy oh ON e.manager_id = oh.employee_id
|
||
WHERE oh.level < 10 -- Prevent infinite recursion
|
||
)
|
||
|
||
SELECT
|
||
employee_id,
|
||
employee_name,
|
||
level,
|
||
hierarchy_path,
|
||
salary,
|
||
AVG(salary) OVER (PARTITION BY level) as avg_salary_at_level,
|
||
COUNT(*) OVER (PARTITION BY top_manager_id) as org_size
|
||
FROM org_hierarchy
|
||
ORDER BY top_manager_id, level, employee_name;</code></pre>
|
||
</div>
|
||
|
||
<div class="recursive-example">
|
||
<h4>Product Category Tree with Aggregations</h4>
|
||
<pre><code>-- Recursive category analysis with sales rollups
|
||
WITH RECURSIVE category_tree AS (
|
||
-- Anchor: Root categories
|
||
SELECT
|
||
category_id,
|
||
category_name,
|
||
parent_category_id,
|
||
1 as level,
|
||
CAST(category_id as VARCHAR(1000)) as path
|
||
FROM product_categories
|
||
WHERE parent_category_id IS NULL
|
||
|
||
UNION ALL
|
||
|
||
-- Recursive: Child categories
|
||
SELECT
|
||
pc.category_id,
|
||
pc.category_name,
|
||
pc.parent_category_id,
|
||
ct.level + 1,
|
||
ct.path + '/' + CAST(pc.category_id as VARCHAR)
|
||
FROM product_categories pc
|
||
INNER JOIN category_tree ct ON pc.parent_category_id = ct.category_id
|
||
),
|
||
|
||
category_sales AS (
|
||
SELECT
|
||
ct.category_id,
|
||
ct.category_name,
|
||
ct.level,
|
||
ct.path,
|
||
COALESCE(SUM(s.sales_amount), 0) as direct_sales,
|
||
COUNT(DISTINCT s.product_id) as product_count
|
||
FROM category_tree ct
|
||
LEFT JOIN products p ON ct.category_id = p.category_id
|
||
LEFT JOIN sales s ON p.product_id = s.product_id
|
||
WHERE s.sale_date >= '2024-01-01'
|
||
GROUP BY ct.category_id, ct.category_name, ct.level, ct.path
|
||
)
|
||
|
||
SELECT
|
||
category_id,
|
||
category_name,
|
||
level,
|
||
REPLICATE(' ', level - 1) + category_name as indented_name,
|
||
direct_sales,
|
||
product_count,
|
||
-- Calculate total sales including subcategories
|
||
(SELECT SUM(cs2.direct_sales)
|
||
FROM category_sales cs2
|
||
WHERE cs2.path LIKE cs1.path + '%') as total_sales_with_children
|
||
FROM category_sales cs1
|
||
ORDER BY path;</code></pre>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="advanced-joins">
|
||
<h2>Complex Joins and Set Operations</h2>
|
||
<p>Advanced join techniques and set operations enable sophisticated data analysis patterns essential for comprehensive business intelligence queries.</p>
|
||
|
||
<h3>Advanced Join Patterns</h3>
|
||
<p>Go beyond basic joins to handle complex analytical requirements:</p>
|
||
|
||
<div class="join-patterns">
|
||
<div class="join-pattern">
|
||
<h4>Self-Joins for Comparative Analysis</h4>
|
||
<pre><code>-- Compare customer performance year-over-year
|
||
SELECT
|
||
current_year.customer_id,
|
||
current_year.customer_name,
|
||
current_year.total_revenue as revenue_2024,
|
||
previous_year.total_revenue as revenue_2023,
|
||
(current_year.total_revenue - COALESCE(previous_year.total_revenue, 0)) as revenue_change,
|
||
CASE
|
||
WHEN previous_year.total_revenue > 0 THEN
|
||
((current_year.total_revenue - previous_year.total_revenue)
|
||
/ previous_year.total_revenue) * 100
|
||
ELSE NULL
|
||
END as growth_percentage
|
||
FROM (
|
||
SELECT customer_id, customer_name, SUM(order_total) as total_revenue
|
||
FROM orders o
|
||
JOIN customers c ON o.customer_id = c.customer_id
|
||
WHERE YEAR(order_date) = 2024
|
||
GROUP BY customer_id, customer_name
|
||
) current_year
|
||
LEFT JOIN (
|
||
SELECT customer_id, SUM(order_total) as total_revenue
|
||
FROM orders
|
||
WHERE YEAR(order_date) = 2023
|
||
GROUP BY customer_id
|
||
) previous_year ON current_year.customer_id = previous_year.customer_id;</code></pre>
|
||
</div>
|
||
|
||
<div class="join-pattern">
|
||
<h4>Lateral Joins for Correlated Subqueries</h4>
|
||
<pre><code>-- Get top 3 products for each customer with lateral join
|
||
SELECT
|
||
c.customer_id,
|
||
c.customer_name,
|
||
tp.product_id,
|
||
tp.product_name,
|
||
tp.total_purchased,
|
||
tp.rank_in_customer
|
||
FROM customers c
|
||
CROSS JOIN LATERAL (
|
||
SELECT
|
||
p.product_id,
|
||
p.product_name,
|
||
SUM(oi.quantity) as total_purchased,
|
||
ROW_NUMBER() OVER (ORDER BY SUM(oi.quantity) DESC) as rank_in_customer
|
||
FROM orders o
|
||
JOIN order_items oi ON o.order_id = oi.order_id
|
||
JOIN products p ON oi.product_id = p.product_id
|
||
WHERE o.customer_id = c.customer_id
|
||
GROUP BY p.product_id, p.product_name
|
||
ORDER BY total_purchased DESC
|
||
LIMIT 3
|
||
) tp
|
||
WHERE c.customer_id IN (SELECT customer_id FROM high_value_customers);</code></pre>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>Set Operations for Complex Analysis</h3>
|
||
<p>Combine result sets to identify patterns, gaps, and overlaps in business data:</p>
|
||
|
||
<div class="set-operations">
|
||
<div class="set-operation">
|
||
<h4>Customer Behavior Analysis with EXCEPT</h4>
|
||
<pre><code>-- Find customers who purchased in 2023 but not in 2024
|
||
WITH customers_2023 AS (
|
||
SELECT DISTINCT customer_id
|
||
FROM orders
|
||
WHERE YEAR(order_date) = 2023
|
||
),
|
||
customers_2024 AS (
|
||
SELECT DISTINCT customer_id
|
||
FROM orders
|
||
WHERE YEAR(order_date) = 2024
|
||
),
|
||
churned_customers AS (
|
||
SELECT customer_id FROM customers_2023
|
||
EXCEPT
|
||
SELECT customer_id FROM customers_2024
|
||
)
|
||
|
||
SELECT
|
||
cc.customer_id,
|
||
c.customer_name,
|
||
c.email,
|
||
last_order.last_order_date,
|
||
last_order.last_order_total,
|
||
lifetime_stats.total_orders,
|
||
lifetime_stats.lifetime_value
|
||
FROM churned_customers cc
|
||
JOIN customers c ON cc.customer_id = c.customer_id
|
||
JOIN (
|
||
SELECT
|
||
customer_id,
|
||
MAX(order_date) as last_order_date,
|
||
MAX(order_total) as last_order_total
|
||
FROM orders
|
||
WHERE customer_id IN (SELECT customer_id FROM churned_customers)
|
||
GROUP BY customer_id
|
||
) last_order ON cc.customer_id = last_order.customer_id
|
||
JOIN (
|
||
SELECT
|
||
customer_id,
|
||
COUNT(*) as total_orders,
|
||
SUM(order_total) as lifetime_value
|
||
FROM orders
|
||
WHERE customer_id IN (SELECT customer_id FROM churned_customers)
|
||
GROUP BY customer_id
|
||
) lifetime_stats ON cc.customer_id = lifetime_stats.customer_id;</code></pre>
|
||
</div>
|
||
|
||
<div class="set-operation">
|
||
<h4>Product Affinity Analysis with INTERSECT</h4>
|
||
<pre><code>-- Find products frequently bought together
|
||
WITH product_pairs AS (
|
||
SELECT
|
||
oi1.product_id as product_a,
|
||
oi2.product_id as product_b,
|
||
COUNT(DISTINCT oi1.order_id) as co_purchase_count
|
||
FROM order_items oi1
|
||
JOIN order_items oi2 ON oi1.order_id = oi2.order_id
|
||
WHERE oi1.product_id < oi2.product_id -- Avoid duplicates and self-pairs
|
||
GROUP BY oi1.product_id, oi2.product_id
|
||
HAVING COUNT(DISTINCT oi1.order_id) >= 5 -- Minimum co-purchases
|
||
),
|
||
|
||
product_stats AS (
|
||
SELECT
|
||
product_id,
|
||
COUNT(DISTINCT order_id) as individual_purchase_count
|
||
FROM order_items
|
||
GROUP BY product_id
|
||
)
|
||
|
||
SELECT
|
||
pp.product_a,
|
||
pa.product_name as product_a_name,
|
||
pp.product_b,
|
||
pb.product_name as product_b_name,
|
||
pp.co_purchase_count,
|
||
psa.individual_purchase_count as product_a_total,
|
||
psb.individual_purchase_count as product_b_total,
|
||
ROUND(
|
||
(pp.co_purchase_count * 1.0 / LEAST(psa.individual_purchase_count, psb.individual_purchase_count)) * 100,
|
||
2
|
||
) as affinity_percentage
|
||
FROM product_pairs pp
|
||
JOIN products pa ON pp.product_a = pa.product_id
|
||
JOIN products pb ON pp.product_b = pb.product_id
|
||
JOIN product_stats psa ON pp.product_a = psa.product_id
|
||
JOIN product_stats psb ON pp.product_b = psb.product_id
|
||
ORDER BY affinity_percentage DESC, co_purchase_count DESC;</code></pre>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="analytical-functions">
|
||
<h2>Analytical and Statistical Functions</h2>
|
||
<p>Modern SQL provides extensive statistical and analytical functions for advanced business intelligence without requiring external tools.</p>
|
||
|
||
<h3>Statistical Aggregates</h3>
|
||
<p>Calculate comprehensive statistics for business metrics:</p>
|
||
|
||
<div class="statistical-functions">
|
||
<h4>Comprehensive Revenue Analysis</h4>
|
||
<pre><code>-- Advanced statistical analysis of revenue by region
|
||
SELECT
|
||
region,
|
||
COUNT(*) as customer_count,
|
||
|
||
-- Central tendency measures
|
||
AVG(annual_revenue) as mean_revenue,
|
||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY annual_revenue) as median_revenue,
|
||
MODE() WITHIN GROUP (ORDER BY annual_revenue) as modal_revenue,
|
||
|
||
-- Variability measures
|
||
STDDEV(annual_revenue) as revenue_stddev,
|
||
VAR(annual_revenue) as revenue_variance,
|
||
(STDDEV(annual_revenue) / AVG(annual_revenue)) * 100 as coefficient_of_variation,
|
||
|
||
-- Distribution measures
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY annual_revenue) as q1,
|
||
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY annual_revenue) as q3,
|
||
PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY annual_revenue) as p90,
|
||
PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY annual_revenue) as p95,
|
||
|
||
-- Range measures
|
||
MIN(annual_revenue) as min_revenue,
|
||
MAX(annual_revenue) as max_revenue,
|
||
MAX(annual_revenue) - MIN(annual_revenue) as revenue_range,
|
||
|
||
-- Outlier detection (IQR method)
|
||
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY annual_revenue) -
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY annual_revenue) as iqr,
|
||
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY annual_revenue) -
|
||
1.5 * (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY annual_revenue) -
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY annual_revenue)) as lower_outlier_threshold,
|
||
|
||
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY annual_revenue) +
|
||
1.5 * (PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY annual_revenue) -
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY annual_revenue)) as upper_outlier_threshold
|
||
|
||
FROM customer_revenue_summary
|
||
WHERE year = 2024
|
||
GROUP BY region
|
||
ORDER BY mean_revenue DESC;</code></pre>
|
||
</div>
|
||
|
||
<h3>Correlation and Regression Analysis</h3>
|
||
<p>Identify relationships between business metrics using SQL:</p>
|
||
|
||
<div class="correlation-analysis">
|
||
<h4>Marketing Spend vs Revenue Correlation</h4>
|
||
<pre><code>-- Calculate correlation between marketing spend and revenue
|
||
WITH monthly_metrics AS (
|
||
SELECT
|
||
DATE_TRUNC('month', metric_date) as month,
|
||
SUM(marketing_spend) as total_marketing_spend,
|
||
SUM(revenue) as total_revenue,
|
||
AVG(customer_satisfaction_score) as avg_satisfaction
|
||
FROM business_metrics
|
||
WHERE metric_date >= '2024-01-01'
|
||
GROUP BY DATE_TRUNC('month', metric_date)
|
||
),
|
||
|
||
correlation_prep AS (
|
||
SELECT
|
||
month,
|
||
total_marketing_spend,
|
||
total_revenue,
|
||
avg_satisfaction,
|
||
AVG(total_marketing_spend) OVER () as mean_marketing,
|
||
AVG(total_revenue) OVER () as mean_revenue,
|
||
AVG(avg_satisfaction) OVER () as mean_satisfaction,
|
||
COUNT(*) OVER () as n
|
||
FROM monthly_metrics
|
||
)
|
||
|
||
SELECT
|
||
-- Pearson correlation coefficient for marketing spend vs revenue
|
||
SUM((total_marketing_spend - mean_marketing) * (total_revenue - mean_revenue)) /
|
||
(SQRT(SUM(POWER(total_marketing_spend - mean_marketing, 2))) *
|
||
SQRT(SUM(POWER(total_revenue - mean_revenue, 2)))) as marketing_revenue_correlation,
|
||
|
||
-- Simple linear regression: revenue = a + b * marketing_spend
|
||
(n * SUM(total_marketing_spend * total_revenue) - SUM(total_marketing_spend) * SUM(total_revenue)) /
|
||
(n * SUM(POWER(total_marketing_spend, 2)) - POWER(SUM(total_marketing_spend), 2)) as regression_slope,
|
||
|
||
(SUM(total_revenue) -
|
||
((n * SUM(total_marketing_spend * total_revenue) - SUM(total_marketing_spend) * SUM(total_revenue)) /
|
||
(n * SUM(POWER(total_marketing_spend, 2)) - POWER(SUM(total_marketing_spend), 2))) * SUM(total_marketing_spend)) / n as regression_intercept,
|
||
|
||
-- R-squared calculation
|
||
1 - (SUM(POWER(total_revenue - (regression_intercept + regression_slope * total_marketing_spend), 2)) /
|
||
SUM(POWER(total_revenue - mean_revenue, 2))) as r_squared
|
||
|
||
FROM correlation_prep;</code></pre>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="time-series-analysis">
|
||
<h2>Time Series Analysis in SQL</h2>
|
||
<p>Time series analysis capabilities in SQL enable trend analysis, seasonality detection, and forecasting essential for business planning.</p>
|
||
|
||
<h3>Trend Analysis and Decomposition</h3>
|
||
<p>Identify underlying trends and seasonal patterns in business data:</p>
|
||
|
||
<div class="time-series-example">
|
||
<h4>Sales Trend and Seasonality Analysis</h4>
|
||
<pre><code>-- Comprehensive time series decomposition
|
||
WITH daily_sales AS (
|
||
SELECT
|
||
sale_date,
|
||
SUM(sale_amount) as daily_revenue,
|
||
EXTRACT(DOW FROM sale_date) as day_of_week,
|
||
EXTRACT(MONTH FROM sale_date) as month,
|
||
EXTRACT(QUARTER FROM sale_date) as quarter
|
||
FROM sales
|
||
WHERE sale_date >= '2023-01-01' AND sale_date <= '2024-12-31'
|
||
GROUP BY sale_date
|
||
),
|
||
|
||
moving_averages AS (
|
||
SELECT
|
||
sale_date,
|
||
daily_revenue,
|
||
day_of_week,
|
||
month,
|
||
quarter,
|
||
|
||
-- Various moving averages for trend analysis
|
||
AVG(daily_revenue) OVER (
|
||
ORDER BY sale_date
|
||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||
) as ma_7_day,
|
||
|
||
AVG(daily_revenue) OVER (
|
||
ORDER BY sale_date
|
||
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||
) as ma_30_day,
|
||
|
||
AVG(daily_revenue) OVER (
|
||
ORDER BY sale_date
|
||
ROWS BETWEEN 89 PRECEDING AND CURRENT ROW
|
||
) as ma_90_day,
|
||
|
||
-- Exponential moving average (approximate)
|
||
daily_revenue * 0.1 +
|
||
LAG(daily_revenue, 1, daily_revenue) OVER (ORDER BY sale_date) * 0.9 as ema_approx
|
||
FROM daily_sales
|
||
),
|
||
|
||
seasonal_decomposition AS (
|
||
SELECT
|
||
sale_date,
|
||
daily_revenue,
|
||
ma_30_day as trend,
|
||
daily_revenue - ma_30_day as detrended,
|
||
|
||
-- Calculate seasonal component by day of week
|
||
AVG(daily_revenue - ma_30_day) OVER (
|
||
PARTITION BY day_of_week
|
||
) as seasonal_dow,
|
||
|
||
-- Calculate seasonal component by month
|
||
AVG(daily_revenue - ma_30_day) OVER (
|
||
PARTITION BY month
|
||
) as seasonal_month,
|
||
|
||
-- Residual component
|
||
daily_revenue - ma_30_day -
|
||
AVG(daily_revenue - ma_30_day) OVER (PARTITION BY day_of_week) as residual
|
||
|
||
FROM moving_averages
|
||
WHERE ma_30_day IS NOT NULL
|
||
)
|
||
|
||
SELECT
|
||
sale_date,
|
||
daily_revenue,
|
||
trend,
|
||
seasonal_dow,
|
||
seasonal_month,
|
||
residual,
|
||
|
||
-- Reconstruct the time series
|
||
trend + seasonal_dow + residual as reconstructed_value,
|
||
|
||
-- Calculate percentage components
|
||
(seasonal_dow / daily_revenue) * 100 as seasonal_dow_pct,
|
||
(residual / daily_revenue) * 100 as residual_pct,
|
||
|
||
-- Trend direction indicators
|
||
CASE
|
||
WHEN trend > LAG(trend, 7) OVER (ORDER BY sale_date) THEN 'Increasing'
|
||
WHEN trend < LAG(trend, 7) OVER (ORDER BY sale_date) THEN 'Decreasing'
|
||
ELSE 'Stable'
|
||
END as trend_direction
|
||
|
||
FROM seasonal_decomposition
|
||
ORDER BY sale_date;</code></pre>
|
||
</div>
|
||
|
||
<h3>Advanced Time Series Functions</h3>
|
||
<p>Utilize specialized time series functions for sophisticated analysis:</p>
|
||
|
||
<div class="advanced-time-series">
|
||
<h4>Change Point Detection and Forecasting</h4>
|
||
<pre><code>-- Detect significant changes in business metrics
|
||
WITH metric_changes AS (
|
||
SELECT
|
||
metric_date,
|
||
revenue,
|
||
LAG(revenue, 1) OVER (ORDER BY metric_date) as prev_revenue,
|
||
LAG(revenue, 7) OVER (ORDER BY metric_date) as prev_week_revenue,
|
||
LAG(revenue, 30) OVER (ORDER BY metric_date) as prev_month_revenue,
|
||
|
||
-- Percentage changes
|
||
CASE
|
||
WHEN LAG(revenue, 1) OVER (ORDER BY metric_date) > 0 THEN
|
||
((revenue - LAG(revenue, 1) OVER (ORDER BY metric_date)) /
|
||
LAG(revenue, 1) OVER (ORDER BY metric_date)) * 100
|
||
END as daily_change_pct,
|
||
|
||
CASE
|
||
WHEN LAG(revenue, 7) OVER (ORDER BY metric_date) > 0 THEN
|
||
((revenue - LAG(revenue, 7) OVER (ORDER BY metric_date)) /
|
||
LAG(revenue, 7) OVER (ORDER BY metric_date)) * 100
|
||
END as weekly_change_pct,
|
||
|
||
-- Rolling statistics for change point detection
|
||
AVG(revenue) OVER (
|
||
ORDER BY metric_date
|
||
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||
) as rolling_30_avg,
|
||
|
||
STDDEV(revenue) OVER (
|
||
ORDER BY metric_date
|
||
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||
) as rolling_30_stddev
|
||
|
||
FROM daily_business_metrics
|
||
),
|
||
|
||
change_points AS (
|
||
SELECT
|
||
metric_date,
|
||
revenue,
|
||
daily_change_pct,
|
||
weekly_change_pct,
|
||
rolling_30_avg,
|
||
rolling_30_stddev,
|
||
|
||
-- Z-score for anomaly detection
|
||
CASE
|
||
WHEN rolling_30_stddev > 0 THEN
|
||
(revenue - rolling_30_avg) / rolling_30_stddev
|
||
END as z_score,
|
||
|
||
-- Flag significant changes
|
||
CASE
|
||
WHEN ABS(daily_change_pct) > 20 THEN 'Significant Daily Change'
|
||
WHEN ABS(weekly_change_pct) > 30 THEN 'Significant Weekly Change'
|
||
WHEN ABS((revenue - rolling_30_avg) / rolling_30_stddev) > 2 THEN 'Statistical Anomaly'
|
||
ELSE 'Normal'
|
||
END as change_classification
|
||
|
||
FROM metric_changes
|
||
WHERE rolling_30_stddev IS NOT NULL
|
||
),
|
||
|
||
-- Simple linear trend for forecasting
|
||
trend_analysis AS (
|
||
SELECT
|
||
COUNT(*) as n,
|
||
SUM(EXTRACT(DAY FROM metric_date)) as sum_x,
|
||
SUM(revenue) as sum_y,
|
||
SUM(EXTRACT(DAY FROM metric_date) * revenue) as sum_xy,
|
||
SUM(POWER(EXTRACT(DAY FROM metric_date), 2)) as sum_x2,
|
||
|
||
-- Linear regression coefficients
|
||
(n * SUM(EXTRACT(DAY FROM metric_date) * revenue) -
|
||
SUM(EXTRACT(DAY FROM metric_date)) * SUM(revenue)) /
|
||
(n * SUM(POWER(EXTRACT(DAY FROM metric_date), 2)) -
|
||
POWER(SUM(EXTRACT(DAY FROM metric_date)), 2)) as slope,
|
||
|
||
(SUM(revenue) -
|
||
((n * SUM(EXTRACT(DAY FROM metric_date) * revenue) -
|
||
SUM(EXTRACT(DAY FROM metric_date)) * SUM(revenue)) /
|
||
(n * SUM(POWER(EXTRACT(DAY FROM metric_date), 2)) -
|
||
POWER(SUM(EXTRACT(DAY FROM metric_date)), 2))) * SUM(EXTRACT(DAY FROM metric_date))) / n as intercept
|
||
|
||
FROM change_points
|
||
WHERE metric_date >= CURRENT_DATE - INTERVAL '90 days'
|
||
)
|
||
|
||
SELECT
|
||
cp.metric_date,
|
||
cp.revenue,
|
||
cp.change_classification,
|
||
cp.z_score,
|
||
|
||
-- Trend line
|
||
ta.intercept + ta.slope * EXTRACT(DAY FROM cp.metric_date) as trend_value,
|
||
|
||
-- Simple forecast (next 7 days)
|
||
ta.intercept + ta.slope * (EXTRACT(DAY FROM cp.metric_date) + 7) as forecast_7_days
|
||
|
||
FROM change_points cp
|
||
CROSS JOIN trend_analysis ta
|
||
ORDER BY cp.metric_date;</code></pre>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="performance-optimization">
|
||
<h2>Query Optimization Strategies</h2>
|
||
<p>Advanced SQL analytics requires optimization techniques to handle large datasets efficiently while maintaining query readability and maintainability.</p>
|
||
|
||
<h3>Index Strategy for Analytics</h3>
|
||
<p>Design indexes specifically for analytical workloads:</p>
|
||
|
||
<div class="index-strategies">
|
||
<div class="index-strategy">
|
||
<h4>Composite Indexes for Window Functions</h4>
|
||
<pre><code>-- Optimize window function queries with proper indexing
|
||
-- Index design for partition by + order by patterns
|
||
|
||
-- For queries with PARTITION BY customer_id ORDER BY order_date
|
||
CREATE INDEX idx_orders_customer_date_analytics ON orders (
|
||
customer_id, -- Partition column first
|
||
order_date, -- Order by column second
|
||
order_total -- Include frequently selected columns
|
||
);
|
||
|
||
-- For time series analysis queries
|
||
CREATE INDEX idx_sales_date_analytics ON sales (
|
||
sale_date, -- Primary ordering column
|
||
product_category, -- Common partition column
|
||
region -- Secondary partition column
|
||
) INCLUDE (
|
||
sale_amount, -- Avoid key lookups
|
||
quantity,
|
||
customer_id
|
||
);
|
||
|
||
-- For ranking queries within categories
|
||
CREATE INDEX idx_products_category_ranking ON products (
|
||
category_id, -- Partition column
|
||
total_sales DESC -- Order by column with sort direction
|
||
) INCLUDE (
|
||
product_name,
|
||
price,
|
||
stock_level
|
||
);</code></pre>
|
||
</div>
|
||
|
||
<div class="index-strategy">
|
||
<h4>Filtered Indexes for Specific Analytics</h4>
|
||
<pre><code>-- Create filtered indexes for specific analytical scenarios
|
||
|
||
-- Index for active customers only
|
||
CREATE INDEX idx_orders_active_customers ON orders (
|
||
customer_id,
|
||
order_date DESC
|
||
)
|
||
WHERE order_date >= DATEADD(YEAR, -2, GETDATE())
|
||
INCLUDE (order_total, product_count);
|
||
|
||
-- Index for high-value transactions
|
||
CREATE INDEX idx_orders_high_value ON orders (
|
||
order_date,
|
||
customer_id
|
||
)
|
||
WHERE order_total >= 1000
|
||
INCLUDE (order_total, discount_amount);
|
||
|
||
-- Index for specific time periods (quarterly analysis)
|
||
CREATE INDEX idx_sales_current_quarter ON sales (
|
||
product_id,
|
||
sale_date
|
||
)
|
||
WHERE sale_date >= DATEADD(QUARTER, DATEDIFF(QUARTER, 0, GETDATE()), 0)
|
||
INCLUDE (sale_amount, quantity);</code></pre>
|
||
</div>
|
||
</div>
|
||
|
||
<h3>Query Optimization Techniques</h3>
|
||
<p>Apply specific optimization patterns for complex analytical queries:</p>
|
||
|
||
<div class="optimization-techniques">
|
||
<div class="technique">
|
||
<h4>Avoiding Redundant Window Function Calculations</h4>
|
||
<pre><code>-- INEFFICIENT: Multiple similar window function calls
|
||
SELECT
|
||
customer_id,
|
||
order_date,
|
||
order_total,
|
||
SUM(order_total) OVER (PARTITION BY customer_id ORDER BY order_date) as running_total,
|
||
AVG(order_total) OVER (PARTITION BY customer_id ORDER BY order_date) as running_avg,
|
||
COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date) as running_count,
|
||
MAX(order_total) OVER (PARTITION BY customer_id ORDER BY order_date) as running_max
|
||
FROM orders;
|
||
|
||
-- EFFICIENT: Calculate once, derive others
|
||
WITH base_calculations AS (
|
||
SELECT
|
||
customer_id,
|
||
order_date,
|
||
order_total,
|
||
SUM(order_total) OVER (PARTITION BY customer_id ORDER BY order_date) as running_total,
|
||
COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date) as running_count,
|
||
MAX(order_total) OVER (PARTITION BY customer_id ORDER BY order_date) as running_max
|
||
FROM orders
|
||
)
|
||
SELECT
|
||
customer_id,
|
||
order_date,
|
||
order_total,
|
||
running_total,
|
||
running_total / running_count as running_avg, -- Derive from existing calculations
|
||
running_count,
|
||
running_max
|
||
FROM base_calculations;</code></pre>
|
||
</div>
|
||
|
||
<div class="technique">
|
||
<h4>Optimizing Large Aggregations</h4>
|
||
<pre><code>-- Use materialized views for frequently accessed aggregations
|
||
CREATE MATERIALIZED VIEW mv_customer_monthly_stats AS
|
||
SELECT
|
||
customer_id,
|
||
DATE_TRUNC('month', order_date) as order_month,
|
||
COUNT(*) as order_count,
|
||
SUM(order_total) as total_revenue,
|
||
AVG(order_total) as avg_order_value,
|
||
MAX(order_date) as last_order_date
|
||
FROM orders
|
||
GROUP BY customer_id, DATE_TRUNC('month', order_date);
|
||
|
||
-- Create appropriate indexes on materialized view
|
||
CREATE INDEX idx_mv_customer_monthly_customer_month
|
||
ON mv_customer_monthly_stats (customer_id, order_month);
|
||
|
||
-- Use partitioning for very large fact tables
|
||
CREATE TABLE sales_partitioned (
|
||
sale_id BIGINT,
|
||
sale_date DATE,
|
||
customer_id INT,
|
||
product_id INT,
|
||
sale_amount DECIMAL(10,2),
|
||
region VARCHAR(50)
|
||
)
|
||
PARTITION BY RANGE (sale_date) (
|
||
PARTITION p2023 VALUES LESS THAN ('2024-01-01'),
|
||
PARTITION p2024_q1 VALUES LESS THAN ('2024-04-01'),
|
||
PARTITION p2024_q2 VALUES LESS THAN ('2024-07-01'),
|
||
PARTITION p2024_q3 VALUES LESS THAN ('2024-10-01'),
|
||
PARTITION p2024_q4 VALUES LESS THAN ('2025-01-01')
|
||
);</code></pre>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="data-quality">
|
||
<h2>Data Quality and Validation</h2>
|
||
<p>Robust data quality checks ensure analytical results are reliable and trustworthy. Implement comprehensive validation within your SQL analytics workflows.</p>
|
||
|
||
<h3>Comprehensive Data Quality Framework</h3>
|
||
<p>Build systematic data quality checks into analytical processes:</p>
|
||
|
||
<div class="data-quality-example">
|
||
<h4>Multi-Dimensional Data Quality Assessment</h4>
|
||
<pre><code>-- Comprehensive data quality assessment query
|
||
WITH data_quality_metrics AS (
|
||
SELECT
|
||
'orders' as table_name,
|
||
COUNT(*) as total_records,
|
||
|
||
-- Completeness checks
|
||
COUNT(*) - COUNT(customer_id) as missing_customer_id,
|
||
COUNT(*) - COUNT(order_date) as missing_order_date,
|
||
COUNT(*) - COUNT(order_total) as missing_order_total,
|
||
|
||
-- Validity checks
|
||
SUM(CASE WHEN order_total < 0 THEN 1 ELSE 0 END) as negative_amounts,
|
||
SUM(CASE WHEN order_date > CURRENT_DATE THEN 1 ELSE 0 END) as future_dates,
|
||
SUM(CASE WHEN order_date < '2020-01-01' THEN 1 ELSE 0 END) as very_old_dates,
|
||
|
||
-- Consistency checks
|
||
SUM(CASE WHEN order_total != (
|
||
SELECT SUM(oi.quantity * oi.unit_price)
|
||
FROM order_items oi
|
||
WHERE oi.order_id = o.order_id
|
||
) THEN 1 ELSE 0 END) as inconsistent_totals,
|
||
|
||
-- Uniqueness checks
|
||
COUNT(*) - COUNT(DISTINCT order_id) as duplicate_order_ids,
|
||
|
||
-- Range checks
|
||
SUM(CASE WHEN order_total > 10000 THEN 1 ELSE 0 END) as potentially_high_amounts,
|
||
|
||
-- Statistical outliers (using IQR method)
|
||
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_total) as q3,
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_total) as q1,
|
||
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY order_total) -
|
||
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY order_total) as iqr
|
||
|
||
FROM orders o
|
||
WHERE order_date >= '2024-01-01'
|
||
),
|
||
|
||
quality_summary AS (
|
||
SELECT
|
||
table_name,
|
||
total_records,
|
||
|
||
-- Calculate quality percentages
|
||
ROUND((1.0 - (missing_customer_id * 1.0 / total_records)) * 100, 2) as customer_id_completeness,
|
||
ROUND((1.0 - (missing_order_date * 1.0 / total_records)) * 100, 2) as order_date_completeness,
|
||
ROUND((1.0 - (missing_order_total * 1.0 / total_records)) * 100, 2) as order_total_completeness,
|
||
|
||
ROUND((1.0 - (negative_amounts * 1.0 / total_records)) * 100, 2) as amount_validity,
|
||
ROUND((1.0 - (future_dates * 1.0 / total_records)) * 100, 2) as date_validity,
|
||
ROUND((1.0 - (inconsistent_totals * 1.0 / total_records)) * 100, 2) as total_consistency,
|
||
ROUND((1.0 - (duplicate_order_ids * 1.0 / total_records)) * 100, 2) as id_uniqueness,
|
||
|
||
-- Outlier detection
|
||
q1 - 1.5 * iqr as lower_outlier_threshold,
|
||
q3 + 1.5 * iqr as upper_outlier_threshold,
|
||
|
||
-- Overall quality score (weighted average)
|
||
ROUND((
|
||
(1.0 - (missing_customer_id * 1.0 / total_records)) * 0.2 +
|
||
(1.0 - (missing_order_date * 1.0 / total_records)) * 0.2 +
|
||
(1.0 - (missing_order_total * 1.0 / total_records)) * 0.2 +
|
||
(1.0 - (negative_amounts * 1.0 / total_records)) * 0.15 +
|
||
(1.0 - (future_dates * 1.0 / total_records)) * 0.1 +
|
||
(1.0 - (inconsistent_totals * 1.0 / total_records)) * 0.1 +
|
||
(1.0 - (duplicate_order_ids * 1.0 / total_records)) * 0.05
|
||
) * 100, 2) as overall_quality_score
|
||
|
||
FROM data_quality_metrics
|
||
)
|
||
|
||
SELECT
|
||
table_name,
|
||
total_records,
|
||
customer_id_completeness || '%' as customer_id_completeness,
|
||
order_date_completeness || '%' as order_date_completeness,
|
||
order_total_completeness || '%' as order_total_completeness,
|
||
amount_validity || '%' as amount_validity,
|
||
date_validity || '%' as date_validity,
|
||
total_consistency || '%' as total_consistency,
|
||
id_uniqueness || '%' as id_uniqueness,
|
||
overall_quality_score || '%' as overall_quality_score,
|
||
|
||
CASE
|
||
WHEN overall_quality_score >= 95 THEN 'Excellent'
|
||
WHEN overall_quality_score >= 90 THEN 'Good'
|
||
WHEN overall_quality_score >= 80 THEN 'Acceptable'
|
||
WHEN overall_quality_score >= 70 THEN 'Poor'
|
||
ELSE 'Critical'
|
||
END as quality_rating
|
||
|
||
FROM quality_summary;</code></pre>
|
||
</div>
|
||
|
||
<h3>Automated Data Quality Monitoring</h3>
|
||
<p>Implement ongoing data quality monitoring with automated alerts:</p>
|
||
|
||
<div class="monitoring-example">
|
||
<h4>Daily Data Quality Dashboard</h4>
|
||
<pre><code>-- Create automated data quality monitoring
|
||
CREATE OR REPLACE VIEW daily_data_quality_dashboard AS
|
||
WITH daily_metrics AS (
|
||
SELECT
|
||
CURRENT_DATE as check_date,
|
||
'daily_sales' as table_name,
|
||
|
||
-- Volume checks
|
||
COUNT(*) as record_count,
|
||
COUNT(*) - LAG(COUNT(*), 1) OVER (ORDER BY DATE(created_at)) as volume_change,
|
||
|
||
-- Completeness monitoring
|
||
COUNT(CASE WHEN sale_amount IS NULL THEN 1 END) as missing_amounts,
|
||
COUNT(CASE WHEN customer_id IS NULL THEN 1 END) as missing_customers,
|
||
|
||
-- Freshness checks
|
||
MAX(created_at) as latest_record,
|
||
EXTRACT(HOUR FROM (CURRENT_TIMESTAMP - MAX(created_at))) as hours_since_latest,
|
||
|
||
-- Business rule validation
|
||
COUNT(CASE WHEN sale_amount <= 0 THEN 1 END) as invalid_amounts,
|
||
COUNT(CASE WHEN sale_date > CURRENT_DATE THEN 1 END) as future_sales,
|
||
|
||
-- Statistical monitoring
|
||
AVG(sale_amount) as avg_sale_amount,
|
||
STDDEV(sale_amount) as stddev_sale_amount
|
||
|
||
FROM sales
|
||
WHERE DATE(created_at) = CURRENT_DATE
|
||
GROUP BY DATE(created_at)
|
||
),
|
||
|
||
quality_alerts AS (
|
||
SELECT
|
||
*,
|
||
CASE
|
||
WHEN ABS(volume_change) > (record_count * 0.2) THEN 'Volume Alert: >20% change'
|
||
WHEN missing_amounts > (record_count * 0.05) THEN 'Completeness Alert: >5% missing amounts'
|
||
WHEN hours_since_latest > 2 THEN 'Freshness Alert: Data older than 2 hours'
|
||
WHEN invalid_amounts > 0 THEN 'Validity Alert: Invalid amounts detected'
|
||
WHEN future_sales > 0 THEN 'Logic Alert: Future sales detected'
|
||
ELSE 'No alerts'
|
||
END as alert_status,
|
||
|
||
CASE
|
||
WHEN hours_since_latest > 4 OR invalid_amounts > (record_count * 0.1) THEN 'Critical'
|
||
WHEN ABS(volume_change) > (record_count * 0.2) OR missing_amounts > (record_count * 0.05) THEN 'Warning'
|
||
ELSE 'Normal'
|
||
END as severity_level
|
||
|
||
FROM daily_metrics
|
||
)
|
||
|
||
SELECT
|
||
check_date,
|
||
table_name,
|
||
record_count,
|
||
volume_change,
|
||
ROUND((1.0 - missing_amounts * 1.0 / record_count) * 100, 2) as amount_completeness_pct,
|
||
hours_since_latest,
|
||
invalid_amounts,
|
||
alert_status,
|
||
severity_level,
|
||
|
||
-- Quality score calculation
|
||
CASE
|
||
WHEN severity_level = 'Critical' THEN 0
|
||
WHEN severity_level = 'Warning' THEN 70
|
||
ELSE 100
|
||
END as daily_quality_score
|
||
|
||
FROM quality_alerts;</code></pre>
|
||
</div>
|
||
</section>
|
||
|
||
<section id="practical-examples">
|
||
<h2>Real-World Business Cases</h2>
|
||
<p>Apply advanced SQL techniques to solve complex business problems across different industries and use cases.</p>
|
||
|
||
<h3>Customer Lifetime Value Analysis</h3>
|
||
<p>Calculate sophisticated CLV metrics using advanced SQL patterns:</p>
|
||
|
||
<div class="clv-example">
|
||
<h4>Predictive Customer Lifetime Value</h4>
|
||
<pre><code>-- Advanced CLV calculation with cohort analysis and predictive elements
|
||
WITH customer_cohorts AS (
|
||
SELECT
|
||
customer_id,
|
||
MIN(order_date) as first_order_date,
|
||
DATE_TRUNC('month', MIN(order_date)) as cohort_month
|
||
FROM orders
|
||
GROUP BY customer_id
|
||
),
|
||
|
||
monthly_customer_activity AS (
|
||
SELECT
|
||
c.customer_id,
|
||
c.cohort_month,
|
||
DATE_TRUNC('month', o.order_date) as activity_month,
|
||
EXTRACT(EPOCH FROM (DATE_TRUNC('month', o.order_date) - c.cohort_month)) /
|
||
EXTRACT(EPOCH FROM INTERVAL '1 month') as period_number,
|
||
COUNT(DISTINCT o.order_id) as orders_count,
|
||
SUM(o.order_total) as revenue,
|
||
AVG(o.order_total) as avg_order_value
|
||
FROM customer_cohorts c
|
||
JOIN orders o ON c.customer_id = o.customer_id
|
||
GROUP BY c.customer_id, c.cohort_month, DATE_TRUNC('month', o.order_date)
|
||
),
|
||
|
||
retention_rates AS (
|
||
SELECT
|
||
cohort_month,
|
||
period_number,
|
||
COUNT(DISTINCT customer_id) as customers_active,
|
||
FIRST_VALUE(COUNT(DISTINCT customer_id)) OVER (
|
||
PARTITION BY cohort_month
|
||
ORDER BY period_number
|
||
) as cohort_size,
|
||
COUNT(DISTINCT customer_id) * 1.0 /
|
||
FIRST_VALUE(COUNT(DISTINCT customer_id)) OVER (
|
||
PARTITION BY cohort_month
|
||
ORDER BY period_number
|
||
) as retention_rate
|
||
FROM monthly_customer_activity
|
||
GROUP BY cohort_month, period_number
|
||
),
|
||
|
||
customer_metrics AS (
|
||
SELECT
|
||
c.customer_id,
|
||
c.cohort_month,
|
||
COUNT(DISTINCT mca.activity_month) as active_months,
|
||
SUM(mca.revenue) as total_revenue,
|
||
AVG(mca.revenue) as avg_monthly_revenue,
|
||
MAX(mca.activity_month) as last_active_month,
|
||
|
||
-- Calculate customer age in months
|
||
EXTRACT(EPOCH FROM (COALESCE(MAX(mca.activity_month), CURRENT_DATE) - c.cohort_month)) /
|
||
EXTRACT(EPOCH FROM INTERVAL '1 month') as customer_age_months,
|
||
|
||
-- Historical CLV (actual)
|
||
SUM(mca.revenue) as historical_clv,
|
||
|
||
-- Frequency and monetary components
|
||
COUNT(DISTINCT mca.activity_month) * 1.0 /
|
||
NULLIF(EXTRACT(EPOCH FROM (MAX(mca.activity_month) - c.cohort_month)) /
|
||
EXTRACT(EPOCH FROM INTERVAL '1 month'), 0) as purchase_frequency,
|
||
|
||
SUM(mca.revenue) / NULLIF(COUNT(DISTINCT mca.activity_month), 0) as avg_revenue_per_active_month
|
||
|
||
FROM customer_cohorts c
|
||
LEFT JOIN monthly_customer_activity mca ON c.customer_id = mca.customer_id
|
||
GROUP BY c.customer_id, c.cohort_month
|
||
),
|
||
|
||
predictive_clv AS (
|
||
SELECT
|
||
cm.*,
|
||
|
||
-- Get cohort-level retention curve
|
||
COALESCE(AVG(rr.retention_rate) OVER (
|
||
PARTITION BY cm.cohort_month
|
||
), 0.1) as avg_cohort_retention,
|
||
|
||
-- Predictive CLV calculation
|
||
-- Formula: (Average Monthly Revenue × Purchase Frequency × Gross Margin) / (1 + Discount Rate - Retention Rate)
|
||
CASE
|
||
WHEN avg_cohort_retention > 0 AND avg_cohort_retention < 1 THEN
|
||
(COALESCE(avg_revenue_per_active_month, 0) *
|
||
COALESCE(purchase_frequency, 0) *
|
||
0.3) / -- Assuming 30% gross margin
|
||
(1 + 0.01 - avg_cohort_retention) -- 1% monthly discount rate
|
||
ELSE historical_clv
|
||
END as predicted_clv,
|
||
|
||
-- Risk segmentation
|
||
CASE
|
||
WHEN EXTRACT(EPOCH FROM (CURRENT_DATE - last_active_month)) /
|
||
EXTRACT(EPOCH FROM INTERVAL '1 month') > 6 THEN 'High Risk'
|
||
WHEN EXTRACT(EPOCH FROM (CURRENT_DATE - last_active_month)) /
|
||
EXTRACT(EPOCH FROM INTERVAL '1 month') > 3 THEN 'Medium Risk'
|
||
WHEN last_active_month >= CURRENT_DATE - INTERVAL '1 month' THEN 'Active'
|
||
ELSE 'Inactive'
|
||
END as customer_status,
|
||
|
||
-- Value tier classification
|
||
NTILE(5) OVER (ORDER BY historical_clv) as value_quintile
|
||
|
||
FROM customer_metrics cm
|
||
LEFT JOIN retention_rates rr ON cm.cohort_month = rr.cohort_month
|
||
AND ROUND(cm.customer_age_months) = rr.period_number
|
||
)
|
||
|
||
SELECT
|
||
customer_id,
|
||
cohort_month,
|
||
customer_status,
|
||
value_quintile,
|
||
active_months,
|
||
customer_age_months,
|
||
ROUND(total_revenue, 2) as historical_clv,
|
||
ROUND(predicted_clv, 2) as predicted_clv,
|
||
ROUND(avg_revenue_per_active_month, 2) as avg_monthly_revenue,
|
||
ROUND(purchase_frequency, 3) as purchase_frequency,
|
||
ROUND(avg_cohort_retention, 3) as cohort_retention_rate,
|
||
|
||
-- Strategic recommendations
|
||
CASE
|
||
WHEN customer_status = 'Active' AND value_quintile >= 4 THEN 'VIP Program'
|
||
WHEN customer_status = 'Active' AND value_quintile = 3 THEN 'Loyalty Program'
|
||
WHEN customer_status = 'Medium Risk' AND value_quintile >= 3 THEN 'Retention Campaign'
|
||
WHEN customer_status = 'High Risk' AND value_quintile >= 3 THEN 'Win-Back Campaign'
|
||
WHEN customer_status = 'Inactive' THEN 'Re-engagement Required'
|
||
ELSE 'Standard Marketing'
|
||
END as recommended_action
|
||
|
||
FROM predictive_clv
|
||
WHERE predicted_clv > 0
|
||
ORDER BY predicted_clv DESC;</code></pre>
|
||
</div>
|
||
|
||
<div class="expert-consultation-cta">
|
||
<h3>Need Advanced SQL Analytics Support?</h3>
|
||
<p>Our database specialists can help you implement sophisticated SQL analytics solutions that scale with your business requirements.</p>
|
||
<a href="../../quote.php?service=sql-analytics" class="btn btn-primary">Get SQL Analytics Consultation</a>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
|
||
<!-- Related Articles -->
|
||
<section class="related-articles">
|
||
<h2>Related Articles</h2>
|
||
<div class="related-grid">
|
||
<article class="related-card">
|
||
<h3><a href="business-intelligence-dashboard-design.php">BI Dashboard Design Best Practices</a></h3>
|
||
<p>Transform your SQL analytics into compelling visual dashboards for business users.</p>
|
||
<span class="read-time">12 min read</span>
|
||
</article>
|
||
|
||
<article class="related-card">
|
||
<h3><a href="../categories/business-intelligence.php">More Business Intelligence Articles</a></h3>
|
||
<p>Explore our complete collection of business intelligence and data analytics resources.</p>
|
||
<span class="read-time">Browse category</span>
|
||
</article>
|
||
|
||
<article class="related-card">
|
||
<h3><a href="/case-studies/">SQL Analytics Success Stories</a></h3>
|
||
<p>See real-world examples of advanced SQL implementations across different industries.</p>
|
||
<span class="read-time">Multiple studies</span>
|
||
</article>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
</article>
|
||
|
||
<!-- CTA Section -->
|
||
<section class="cta">
|
||
<div class="container">
|
||
<div class="cta-content">
|
||
<h2>Need Expert SQL Analytics Services?</h2>
|
||
<p>Our data engineering team builds high-performance SQL solutions that unlock insights from your business data.</p>
|
||
<div class="cta-buttons">
|
||
<a href="/quote" class="btn btn-primary">Get Free Consultation</a>
|
||
<a href="/#services" class="btn btn-secondary">Explore Data Services</a>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</main>
|
||
|
||
<!-- Footer -->
|
||
<footer class="footer">
|
||
<div class="container">
|
||
<div class="footer-content">
|
||
<div class="footer-section">
|
||
<div class="footer-logo">
|
||
<img src="../../assets/images/logo-white.svg" alt="UK Data Services" loading="lazy">
|
||
</div>
|
||
<p>Enterprise data intelligence solutions for modern British business. Transform your operations with accurate, actionable insights and regulatory-compliant data services.</p>
|
||
</div>
|
||
|
||
<div class="footer-section">
|
||
<h3>Analytics Services</h3>
|
||
<ul>
|
||
<li><a href="/#services">SQL Analytics</a></li>
|
||
<li><a href="/#services">Database Optimization</a></li>
|
||
<li><a href="/#services">Business Intelligence</a></li>
|
||
<li><a href="/#services">Data Engineering</a></li>
|
||
<li><a href="/#services">Performance Tuning</a></li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="footer-section">
|
||
<h3>Resources</h3>
|
||
<ul>
|
||
<li><a href="/">SQL Analytics Blog</a></li>
|
||
<li><a href="/case-studies/">Case Studies</a></li>
|
||
<li><a href="/about">About UK Data Services</a></li>
|
||
<li><a href="/project-types">Project Types</a></li>
|
||
<li><a href="/faq">FAQ</a></li>
|
||
<li><a href="/quote">Request Consultation</a></li>
|
||
</ul>
|
||
</div>
|
||
|
||
<div class="footer-section">
|
||
<h3>Legal & Support</h3>
|
||
<ul>
|
||
<li><a href="/privacy-policy">Privacy Policy</a></li>
|
||
<li><a href="/terms-of-service">Terms of Service</a></li>
|
||
<li><a href="/cookie-policy">Cookie Policy</a></li>
|
||
<li><a href="/gdpr-compliance">GDPR Compliance</a></li>
|
||
<li><a href="/#contact">Contact & Support</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="footer-bottom">
|
||
<p>© <?php echo date('Y'); ?> UK Data Services. All rights reserved.</p>
|
||
<div class="social-links">
|
||
<a href="https://www.linkedin.com/company/uk-data-services" aria-label="LinkedIn" rel="noopener" target="_blank">
|
||
<img src="../../assets/images/icon-linkedin.svg" alt="LinkedIn" loading="lazy">
|
||
</a>
|
||
<a href="https://twitter.com/ukdataservices" aria-label="Twitter" rel="noopener" target="_blank">
|
||
<img src="../../assets/images/icon-twitter.svg" alt="Twitter" loading="lazy">
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</footer>
|
||
|
||
<!-- Scripts -->
|
||
<script src="../../assets/js/main.js"></script>
|
||
|
||
<!-- Article-specific functionality -->
|
||
<script>
|
||
document.addEventListener('DOMContentLoaded', function() {
|
||
// Enhanced table of contents navigation
|
||
const tocLinks = document.querySelectorAll('.article-toc a');
|
||
const sections = document.querySelectorAll('.article-content section[id]');
|
||
|
||
// Smooth scrolling with offset for fixed header
|
||
tocLinks.forEach(link => {
|
||
link.addEventListener('click', function(e) {
|
||
e.preventDefault();
|
||
const targetId = this.getAttribute('href');
|
||
const targetSection = document.querySelector(targetId);
|
||
if (targetSection) {
|
||
const headerOffset = 100;
|
||
const elementPosition = targetSection.getBoundingClientRect().top;
|
||
const offsetPosition = elementPosition + window.pageYOffset - headerOffset;
|
||
|
||
window.scrollTo({
|
||
top: offsetPosition,
|
||
behavior: 'smooth'
|
||
});
|
||
}
|
||
});
|
||
});
|
||
|
||
// Reading progress indicator
|
||
const article = document.querySelector('.article-content');
|
||
const progressBar = document.createElement('div');
|
||
progressBar.className = 'reading-progress';
|
||
progressBar.style.cssText = `
|
||
position: fixed;
|
||
top: 0;
|
||
left: 0;
|
||
width: 0%;
|
||
height: 3px;
|
||
background: linear-gradient(90deg, #179e83, #144784);
|
||
z-index: 1000;
|
||
transition: width 0.3s ease;
|
||
`;
|
||
document.body.appendChild(progressBar);
|
||
|
||
function updateReadingProgress() {
|
||
const articleRect = article.getBoundingClientRect();
|
||
const articleHeight = article.offsetHeight;
|
||
const viewportHeight = window.innerHeight;
|
||
const scrolled = Math.max(0, -articleRect.top);
|
||
const progress = Math.min(100, (scrolled / (articleHeight - viewportHeight)) * 100);
|
||
progressBar.style.width = progress + '%';
|
||
}
|
||
|
||
window.addEventListener('scroll', updateReadingProgress);
|
||
updateReadingProgress();
|
||
});
|
||
</script>
|
||
</body>
|
||
</html>
|