31

Refactoring SQL for Performance

Embed Size (px)

Citation preview

loan_nbr customer_nbr code value

1 1 amount 1500.00

1 1 date 20080110

1 1 type personal

2 2 amount 3500.00

2 2 date 20080215

2 2 type personal

CREATE TABLE EAV_Loans (

loan_nbr INT NOT NULL,

customer_nbr INT NOT NULL,

code VARCHAR(30) NOT NULL,

value VARCHAR(200),

CONSTRAINT pk_eav_loans

PRIMARY KEY (loan_nbr, customer_nbr, code));

-- Customers with personal loans over 1000.00 for the period

-- Jan 1, 2008 through Jan 31, 2008

SELECT A.loan_nbr,

A.customer_nbr,

CAST(A.value AS DATETIME) AS loan_date,

CAST(B.value AS DECIMAL(15, 2)) AS loan_amount

FROM EAV_Loans AS A

INNER JOIN EAV_Loans AS B

ON A.loan_nbr = B.loan_nbr

AND A.customer_nbr = B.customer_nbr

INNER JOIN EAV_Loans AS C

ON A.loan_nbr = C.loan_nbr

AND A.customer_nbr = C.customer_nbr

WHERE A.code = 'date'

AND CAST(A.value AS DATETIME) >= '20080101'

AND CAST(A.value AS DATETIME) < '20080201'

AND B.code = 'amount'

AND CAST(B.value AS DECIMAL(15, 2)) > 1000.00

AND C.code = 'type'

AND C.value = 'personal';

SELECT A.loan_nbr,

A.customer_nbr,

loan_date,

loan_amount

FROM (SELECT loan_nbr, customer_nbr,

CAST(value AS DATETIME) AS loan_date

FROM EAV_Loans

WHERE code = 'date') AS A

INNER JOIN (SELECT loan_nbr, customer_nbr,

CAST(value AS DECIMAL(15, 2))

AS loan_amount

FROM EAV_Loans

WHERE code = 'amount') AS B

ON A.loan_nbr = B.loan_nbr

AND A.customer_nbr = B.customer_nbr

INNER JOIN (SELECT loan_nbr, customer_nbr,

value AS loan_type

FROM EAV_Loans

WHERE code = 'type') AS C

ON A.loan_nbr = C.loan_nbr

AND A.customer_nbr = C.customer_nbr

WHERE loan_date >= '20080101'

AND loan_date < '20080201'

AND loan_amount > 1000.00

AND loan_type = 'personal';

SELECT loan_nbr,

customer_nbr,

loan_date,

loan_amount

FROM (SELECT loan_nbr,

customer_nbr,

MAX(CASE WHEN code = 'date'

THEN CAST(value AS DATETIME) END),

MAX(CASE WHEN code = 'amount'

THEN CAST(value AS DECIMAL(15, 2)) END),

MAX(CASE WHEN code = 'type'

THEN value END)

FROM EAV_Loans

GROUP BY loan_nbr, customer_nbr

)

AS L(loan_nbr, customer_nbr, loan_date, loan_amount, loan_type)

WHERE loan_date >= '20080101'

AND loan_date < '20080201'

AND loan_amount > 1000.00

AND loan_type = 'personal';

loan_nbr customer_nbr loan_date loan_amount loan_type

1 1 2008-01-10 00:00:00.000 1500.00 personal

2 2 2008-02-15 00:00:00.000 3500.00 personal

CREATE TABLE Loans (

loan_nbr INT NOT NULL,

customer_nbr INT NOT NULL,

loan_date DATETIME NOT NULL,

loan_amount DECIMAL(15, 2) NOT NULL,

loan_type VARCHAR(10) NOT NULL,

CONSTRAINT ck_loan_type

CHECK (loan_type IN ('personal', 'business')),

CONSTRAINT pk_loans

PRIMARY KEY (loan_nbr));

-- Convert EAV table to normalized

INSERT INTO Loans

(loan_nbr, customer_nbr, loan_date, loan_amount, loan_type)

SELECT loan_nbr,

customer_nbr,

MAX(CASE WHEN code = 'date'

THEN CAST(value AS DATETIME) END),

MAX(CASE WHEN code = 'amount'

THEN CAST(value AS DECIMAL(15, 2)) END),

MAX(CASE WHEN code = 'type' THEN value END)

FROM EAV_Loans

GROUP BY loan_nbr, customer_nbr;

-- Customers with personal loans over 1000.00

-- for period Jan 1, 2008 through Jan 31, 2008

SELECT loan_nbr, customer_nbr, loan_date, loan_amount

FROM Loans

WHERE loan_date >= '20080101'

AND loan_date < '20080201'

AND loan_amount > 1000.00

AND loan_type = 'personal';

-- Replacement view for legacy code

CREATE VIEW EAV_Loans

(loan_nbr, customer_nbr, code, value)

AS

SELECT loan_nbr, customer_nbr,

CAST('date' AS VARCHAR(30)),

CONVERT(VARCHAR(200), loan_date, 112)

FROM Loans

UNION

SELECT loan_nbr, customer_nbr,

CAST('amount' AS VARCHAR(30)),

CAST(loan_amount AS VARCHAR(200))

FROM Loans

UNION

SELECT loan_nbr, customer_nbr,

CAST('type' AS VARCHAR(30)),

CAST(loan_type AS VARCHAR(200))

FROM Loans;

loan_nbr loan_date loan_amount loan_type rk

3 2008-03-11 00:00:00.000 5000.00 business 1

6 2008-03-27 00:00:00.000 4000.00 business 2

7 2008-04-10 00:00:00.000 3500.00 business 3

4 2008-03-12 00:00:00.000 2000.00 personal 1

8 2008-04-12 00:00:00.000 2000.00 personal 2

1 2008-01-01 00:00:00.000 1500.00 personal 3

5 2008-03-25 00:00:00.000 1200.00 personal 4

2 2008-02-15 00:00:00.000 1000.00 personal 5

SELECT loan_nbr, loan_date, loan_amount,

loan_type,

(SELECT COUNT(*)

FROM Loans AS L2

WHERE L2.loan_type = L1.loan_type

AND (L2.loan_amount > L1.loan_amount

OR L2.loan_amount = L1.loan_amount

AND L2.loan_nbr <= L1.loan_nbr)) AS rk

FROM Loans AS L1

ORDER BY loan_type, rk;

SELECT loan_nbr, loan_date, loan_amount, loan_type,

ROW_NUMBER() OVER(PARTITION BY loan_type

ORDER BY loan_amount DESC,

loan_nbr) AS rk

FROM Loans

ORDER BY loan_type, rk;

loan_nbr customer_nbr loan_date loan_amount loan_type

2 2 2008-02-15 00:00:00.000 1000.00 personal

3 1 2008-03-11 00:00:00.000 4500.00 business

4 3 2008-03-12 00:00:00.000 2000.00 personal

loan_nbr customer_nbr loan_date loan_amount loan_type

1 1 2008-01-01 00:00:00.000 1500.00 personal

2 2 2008-02-15 00:00:00.000 1000.00 personal

3 1 2008-03-11 00:00:00.000 5000.00 business

loan_nbr customer_nbr loan_date loan_amount loan_type

2 2 2008-02-15 00:00:00.000 1000.00 personal

3 1 2008-03-11 00:00:00.000 4500.00 business

4 3 2008-03-12 00:00:00.000 2000.00 personal

-- Update changed

UPDATE Loans

SET loan_amount = (SELECT D.loan_amount

FROM DailyChangedLoans AS D

WHERE D.loan_nbr = Loans.loan_nbr

AND D.loan_amount <> Loans.loan_amount)

WHERE EXISTS(SELECT *

FROM DailyChangedLoans AS D

WHERE D.loan_nbr = Loans.loan_nbr

AND D.loan_amount <> Loans.loan_amount);

-- Insert new loans

INSERT INTO Loans

(loan_nbr, customer_nbr, loan_date, loan_amount, loan_type)

SELECT loan_nbr,

customer_nbr,

loan_date,

loan_amount,

loan_type

FROM DailyChangedLoans AS D

WHERE NOT EXISTS(SELECT *

FROM Loans AS L

WHERE D.loan_nbr = L.loan_nbr);

-- Remove deleted

DELETE FROM Loans

WHERE NOT EXISTS(SELECT *

FROM DailyChangedLoans AS D

WHERE D.loan_nbr = Loans.loan_nbr);

-- Using a single MERGE statement

MERGE INTO Loans AS L

USING DailyChangedLoans AS D

ON D.loan_nbr = L.loan_nbr

WHEN MATCHED

AND L.loan_amount <> D.loan_amount

THEN UPDATE SET loan_amount = D.loan_amount

WHEN NOT MATCHED

THEN INSERT VALUES(D.loan_nbr,

D.customer_nbr,

D.loan_date,

D.loan_amount,

D.loan_type)

WHEN NOT MATCHED BY SOURCE

THEN DELETE;

loan_nbr customer_nbr loan_date loan_amount loan_type

1 1 2008-01-01 00:00:00.000 1500.00 personal

2 2 2008-01-02 00:00:00.000 1000.00 personal

3 1 2008-01-03 00:00:00.000 5000.00 business

4 3 2008-01-12 00:00:00.000 2000.00 personal

5 4 2008-01-13 00:00:00.000 1200.00 personal

6 3 2008-01-29 00:00:00.000 4000.00 business

7 5 2008-01-30 00:00:00.000 3500.00 business

8 2 2008-01-31 00:00:00.000 2000.00 personal

start_date end_date

2008-01-01 00:00:00.000 2008-01-03 00:00:00.000

2008-01-12 00:00:00.000 2008-01-13 00:00:00.000

2008-01-29 00:00:00.000 2008-01-31 00:00:00.000

-- Find last date for date range

-- and use as grouping factor

SELECT MIN(loan_date) AS start_date,

MAX(loan_date) AS end_date

FROM (SELECT loan_date,

(SELECT MIN(L2.loan_date)

FROM Loans AS L2

WHERE L2.loan_date >= L1.loan_date

AND NOT EXISTS

(SELECT *

FROM Loans AS L3

WHERE L3.loan_date =

DATEADD(DAY, 1, L2.loan_date))

) AS base

FROM Loans AS L1) AS L

GROUP BY base;

-- Preparation for solution

SELECT loan_date,

DATEDIFF(DAY, '19000101', loan_date) AS days_since_base_date,

ROW_NUMBER() OVER(ORDER BY loan_date) AS rn

FROM Loans;

loan_date days_since_base_date rn

2008-01-01 00:00:00.000 39446 1

2008-01-02 00:00:00.000 39447 2

2008-01-03 00:00:00.000 39448 3

2008-01-12 00:00:00.000 39457 4

2008-01-13 00:00:00.000 39458 5

2008-01-29 00:00:00.000 39474 6

2008-01-30 00:00:00.000 39475 7

2008-01-31 00:00:00.000 39476 8

-- Solution with ROW_NUMBER

SELECT MIN(loan_date) AS start_date,

MAX(loan_date) AS end_date

FROM (SELECT loan_date,

DATEDIFF(DAY, '19000101', loan_date) -

ROW_NUMBER() OVER(ORDER BY loan_date) AS base

FROM Loans) AS L

GROUP BY base;