ALTER PROCEDURE dbo.usp_blg_vkj_0000002 AS
BEGIN
/* Here is an example of omitting data by linking two outer or inner joined tables
to each other instead of a root containing all possible keys for both tables.
This example also introduces a limitation of SQL as joining more than one table
on the same columns where one of the source record sets contains more than one record
per join criteria causes row duplication to occur. */
-------------------------------------------------------------------------
-- Create schema.
-------------------------------------------------------------------------
IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = N'BLOG2')
EXEC sys.sp_executesql N'CREATE SCHEMA [BLOG2] AUTHORIZATION [dbo]'
-------------------------------------------------------------------------
-- Create example tables.
-------------------------------------------------------------------------
IF NOT OBJECT_ID('BLOG2.Entity') IS NULL DROP TABLE BLOG2.Entity
CREATE TABLE BLOG2.Entity(EntityID integer, EntityName varchar(50))
-- populate Entity with 3 records.
INSERT INTO BLOG2.Entity(EntityID, EntityName )
SELECT EntityID = 1, EntityName = 'Entity1'
UNION SELECT EntityID = 2, EntityName = 'Entity2'
UNION SELECT EntityID = 3, EntityName = 'Entity3'
-- Create EntityBase1 and populate it with 3 records.
IF NOT OBJECT_ID('BLOG2.EntityBase1') IS NULL DROP TABLE BLOG2.EntityBase1
CREATE TABLE BLOG2.EntityBase1 (EntityID integer, Period integer)
INSERT INTO BLOG2.EntityBase1 (EntityID,Period )
SELECT Entity = 1, Period = 201101
UNION SELECT Entity = 1, Period = 201102
-- Create EntityBase2 and populate it with 3 records.
IF NOT OBJECT_ID('BLOG2.EntityBase2') IS NULL DROP TABLE BLOG2.EntityBase2
CREATE TABLE BLOG2.EntityBase2 (EntityID integer, FromPeriod integer, ThroughPeriod integer)
INSERT INTO BLOG2.EntityBase2 (EntityID,FromPeriod, ThroughPeriod )
SELECT Entity = 1, FromPeriod = 201101, ThroughPeriod = 201103
UNION SELECT Entity = 2, FromPeriod = 201104, ThroughPeriod = 201107
-- Create the 1st table for monetary data.
IF NOT OBJECT_ID('BLOG2.EntityDetail1') IS NULL DROP TABLE EntityDetail1
CREATE TABLE BLOG2.EntityDetail1 (EntityID integer, period integer, Amount money)
-- insert 3 records
INSERT INTO BLOG2.EntityDetail1 (EntityID, period, Amount)
SELECT BaseTable.EntityID , BaseTable.period , BaseTable.Amount
FROM (
SELECT EntityID = 1, period = 201101, Amount = 50
UNION ALL SELECT EntityID = 1, period = 201101, Amount = 60
UNION ALL SELECT EntityID = 1, period = 201101, Amount = 70
) BaseTable
-- Create the 2nd table for monetary data.
IF NOT OBJECT_ID('BLOG2.EntityDetail2') IS NULL DROP TABLE BLOG2.EntityDetail2
CREATE TABLE BLOG2.EntityDetail2(EntityID integer, period integer, Amount money)
-- insert 3 monetary records
INSERT INTO BLOG2.EntityDetail2(EntityID, period, Amount)
SELECT BaseTable.EntityID , BaseTable.period , BaseTable.Amount
FROM (
SELECT EntityID = 1, period = 201101, Amount = 50
UNION ALL SELECT EntityID = 1, period = 201101, Amount = 40
UNION ALL SELECT EntityID = 1, period = 201102, Amount = 30
) BaseTable
-------------------------------------------------------------------------
-- End of DML
-------------------------------------------------------------------------
/*
SELECT * FROM BLOG2.EntityBase1 BaseTable
EntityID Period
----------- -----------
1 201101
1 201102
(2 row(s) affected)
SELECT * FROM BLOG2.EntityDetail1 BaseTable
EntityID period Amount
----------- ----------- ---------------------
1 201101 50.00
1 201101 60.00
1 201101 70.00
(3 row(s) affected)
SELECT * FROM BLOG2.EntityDetail2 BaseTable
EntityID period Amount
----------- ----------- ---------------------
1 201101 50.00
1 201101 40.00
1 201102 30.00
(3 row(s) affected)
The next example will have a join causing omission of data. The join
LEFT JOIN BLOG2.EntityDetail2 Join2 WITH (NOLOCK)
ON Join1.EntityID = Join2.EntityID
AND Join1.Period = Join2.Period
should actually be coded to link to the BaseTable as
LEFT JOIN BLOG2.EntityDetail2 Join2 WITH (NOLOCK)
ON BaseTable.EntityID = Join2.EntityID
AND BaseTable.Period = Join2.Period
although that is not entirely correct as I have introduced an inherent SQL limitation in this example.
Here is our data.
--BLOG2.EntityBase1 BaseTable
EntityID Period
----------- -----------
1 201101
1 201102
--BLOG2.EntityDetail1 BaseTable
EntityID period Amount
----------- ----------- ---------------------
1 201101 50.00
1 201101 60.00
1 201101 70.00
--BLOG2.EntityDetail2 BaseTable
EntityID period Amount
----------- ----------- ---------------------
1 201101 50.00
1 201101 40.00
1 201102 30.00
Our EntityBase1 table only contains two records.
One for 201101 and one for 201102. Both for EntityId = 1
Our EntityDetail1 table contains 3 records for EntityId = 1. All three for period 201101.
Our EntityDetail2 table contains 3 records, 2 for 201101 and 1 for 201102.
In total we have 5 records for 201101 and 1 record for 201102 so in total we should have at most 6 records.
Due to invalid joins in the queries below we have 7 records returning however the 201102 data is not returned at all in the first query.
*/
SELECT
BaseTable.EntityID
, Period = BaseTable.Period
, Period1 =Join1.Period
, Join1Amount = Join1.Amount
, Period2 = Join2.Period
, Join2Amount = Join2.Amount
FROM BLOG2.EntityBase1 BaseTable WITH (NOLOCK)
LEFT JOIN BLOG2.EntityDetail1 Join1 WITH (NOLOCK)
ON BaseTable.EntityID = Join1.EntityID
AND BaseTable.Period = Join1.Period
LEFT JOIN BLOG2.EntityDetail2 Join2 WITH (NOLOCK)
ON Join1.EntityID = Join2.EntityID
AND Join1.Period = Join2.Period
/*
As you can see the values returned by this query do not reflect the actual data.
EntityID Period Period1 Join1Amount Period2 Join2Amount
----------- ----------- ----------- --------------------- ----------- ---------------------
1 201101 201101 50.00 201101 50.00
1 201101 201101 50.00 201101 40.00
1 201101 201101 60.00 201101 50.00
1 201101 201101 60.00 201101 40.00
1 201101 201101 70.00 201101 50.00
1 201101 201101 70.00 201101 40.00
1 201102 NULL NULL NULL NULL
(7 row(s) affected)
Now lets try to query the data a bit differently. This example will fix one thing wrong with the
prior query but will leave one more thing to be fixed. In this example a base record set containing
all possible keys (EntityID,Period) for both tables is created. These values are then linked to the
tables containing the actual data. There is still a problem with this query. As discussed in the last
example both EntityDetail1 and EntityDetail2 need to link on the same columns (EntityID,Period) which causes row duplication when either join returns more than one record and has identical join criteria.
In the following query a fix to join2 has been implemented as a join on the BaseTable instead of join1. Our 201102 data is now showing up however this is still not correct but let's see what the data is returned anyway. */
SELECT
BaseTable.EntityID
, AllPossible.Period
, AmountBaseTable = Join1.Amount
, AmountJoin1 = Join2.Amount
FROM BLOG2.EntityBase1 BaseTable WITH (NOLOCK)
-- All possible
LEFT JOIN (
SELECT EntityID , Period
FROM BLOG2.EntityDetail1
UNION -- Don't use UNION ALL as we want a distinct list to join on.
SELECT EntityID , Period
FROM BLOG2.EntityDetail2
) AllPossible
ON BaseTable.EntityID = AllPossible.EntityID
AND BaseTable.Period = AllPossible.Period
-- Detail 1
LEFT JOIN BLOG2.EntityDetail1 Join1 WITH (NOLOCK)
ON AllPossible.EntityID = Join1.EntityID
AND AllPossible.Period = Join1.Period
-- Detail 2
LEFT JOIN BLOG2.EntityDetail2 Join2 WITH (NOLOCK)
ON AllPossible.EntityID = Join2.EntityID
AND AllPossible.Period = Join2.Period
/* As can be seen we still have row duplication issues.
EntityID Period AmountBaseTable AmountJoin1
----------- ----------- --------------------- ---------------------
1 201101 50.00 50.00
1 201101 50.00 40.00
1 201101 60.00 50.00
1 201101 60.00 40.00
1 201101 70.00 50.00
1 201101 70.00 40.00
1 201102 NULL 30.00
(7 row(s) affected)
In this example all possible records have been rolled up into 'AllPossible'
and the tables containing the monetary amounts have been formatted as one table
by using the UNION ALL operator. We need to make sure to use the UNION ALL operator as we don't want to lose any data. A flag is supplied to denote which table the
values come from.
*/
SELECT
BaseTable.EntityID
, AllPossible.Period
, Join2.RecType
, Join2.Amount
FROM BLOG2.EntityBase1 BaseTable WITH (NOLOCK)
LEFT JOIN (
SELECT
EntityID
, Period
FROM BLOG2.EntityDetail1
UNION
SELECT
EntityID
, Period
FROM BLOG2.EntityDetail2
) AllPossible
ON BaseTable.EntityID = AllPossible.EntityID
AND BaseTable.Period = AllPossible.Period
LEFT JOIN (
SELECT
RecType = 1
, *
FROM BLOG2.EntityDetail1
UNION ALL
SELECT
RecType = 2
, *
FROM BLOG2.EntityDetail2
) Join2
ON AllPossible.EntityID = Join2.EntityID
AND AllPossible.Period = Join2.Period
/*
We now have only 6 records returned and they all display the correct values.
EntityID Period RecType Amount
----------- ----------- ----------- ---------------------
1 201101 1 50.00
1 201101 1 60.00
1 201101 1 70.00
1 201101 2 50.00
1 201101 2 40.00
1 201102 2 30.00
(6 row(s) affected)
Here is another version of the prior query. In this example we sum the values and display them side by side.
*/
SELECT
BaseTable.EntityID
, AllPossible.Period
, AmountBaseTable = SUM(CASE WHEN Join2.RecType = 1 THEN Join2.Amount ELSE 0 END)
, AmountJoin1 = SUM(CASE WHEN Join2.RecType = 2 THEN Join2.Amount ELSE 0 END)
FROM BLOG2.EntityBase1 BaseTable WITH (NOLOCK)
LEFT JOIN (
SELECT
EntityID
, Period
FROM BLOG2.EntityDetail1
UNION
SELECT
EntityID
, Period
FROM BLOG2.EntityDetail2
) AllPossible
ON BaseTable.EntityID = AllPossible.EntityID
AND BaseTable.Period = AllPossible.Period
LEFT JOIN (
SELECT
RecType = 1
, *
FROM BLOG2.EntityDetail1
UNION ALL
SELECT
RecType = 2
, *
FROM BLOG2.EntityDetail2
) Join2
ON AllPossible.EntityID = Join2.EntityID
AND AllPossible.Period = Join2.Period
GROUP BY BaseTable.EntityID , AllPossible.Period
/*
EntityID Period AmountBaseTable AmountJoin1
----------- ----------- --------------------- ---------------------
1 201101 180.00 90.00
1 201102 0.00 30.00
(2 row(s) affected)
Hopefully the examples I have provided will help the reader understand
how row duplication occurs in SQL statements and how to deal with
identical join criteria. If both EntityDetail tables had unique record
constraints on (entityid,period) they could have have been linked to
AllPossible without causing row duplication.
*/
END
GO
-- Another Example.
-------------------------------------------------------------------------------
-- Here is an example of ommiting data by linked two outer or inner joined tables
-- to each other instead of a root containing all possible keys for both tables.
-------------------------------------------------------------------------------
SELECT t1.InvestorID , t2.Period , t3.Period
FROM dbo.InvestorBase t1 WITH (NOLOCK)
LEFT JOIN dbo.InvestorDetail1 t2 WITH (NOLOCK) ON t1.InvestorID = t2.InvestorID
LEFT JOIN dbo.InvestorDetail2 t3 WITH (NOLOCK) ON t2.InvestorID = t3.InvestorID
AND t2.Period = t3.Period
SELECT t1.InvestorID , AllPossible.Period, Amount1 = t2.Amount, Amount2 = t3.Amount
FROM dbo.InvestorBase t1 WITH (NOLOCK)
LEFT JOIN (
SELECT InvestorID , Period FROM dbo.InvestorDetail1
SELECT InvestorID , Period FROM dbo.InvestorDetail2
) AllPossible
ON t1.InvestorID = AllPossible.InvestorID
AND t1.Period = AllPossible.Period
LEFT JOIN dbo.InvestorDetail1 t2 WITH (NOLOCK) ON AllPossible.InvestorID = t2.InvestorID
AND AllPossible.Period = t2.Period
LEFT JOIN dbo.InvestorDetail2 t3 WITH (NOLOCK) ON AllPossible.InvestorID = t3.InvestorID
AND AllPossible.Period = t3.Period
No comments:
Post a Comment