Looking for a better query (I think I'm in a box)

Please start any new threads on our new site at https://forums.sqlteam.com. We've got lots of great SQL Server experts to answer whatever question you can come up with.

All Forums

SQL Server 2000 Forums

Transact-SQL (2000)

Looking for a better query (I think I'm in a box)

Author

Topic

souLTower
Starting Member

39 Posts

Posted - 2009-05-14 : 11:45:51

I am supporting a legacy financial app. There are some performance issues that I am trying to nail down. There are 2 table structures like the one I am demonstrating below. No matter what I have tried I have not been able to get the query time to a reasonable point. Any thoughts or suggestions?


-- Demonstrate the performance issues associated with the current design
-- and try to improve the performance
set nocount on

-- Create the costs table
if(object_id('tempdb..#costs')) is not null drop table #costs
create table #costs(costID int identity(1, 1), itemID int, stage int, itemCost float, externalID int)
create clustered index idx_costs on #costs(itemID, stage)
GO

-- Create the distribution table
if(object_id('tempdb..#dist')) is not null drop table #dist
create table #dist(distID int identity(1, 1), distAmount int, itemID int, stage int)
create clustered index idx_dist on #dist(itemID, stage)
GO

-- Create child 1
if(object_id('tempdb..#child1')) is not null drop table #child1
create table #child1(childID int identity(1, 1), distID int, externalID int)
create clustered index idx_child1 on #child1(distID)
GO

-- Create child 2
if(object_id('tempdb..#child2')) is not null drop table #child2
create table #child2(childID int identity(1, 1), distID int, externalID int)
create clustered index idx_child2 on #child2(distID)
GO


-- Populate with some data
declare @counter int, @base int, @stage int
select @counter = 500000, @base = 10000, @stage = 5
while(@counter >= 0) begin
  insert #costs (itemID, stage, itemCost, externalID) 
    select rand() * @base, rand() * @stage, 100, rand() * @base

  insert #dist (itemID, stage, distAmount) 
    select rand() * @base, rand() * @stage, 100

  if(@counter % 2 = 0) insert #child1(distID, externalID) values(@@identity, 100)
  else                 insert #child2(distID, externalID) values(@@identity, 100)
  set @counter = @counter - 1
end


-- This is the typical query used to get the cost and dist information
-- Typically it takes 3 - 7 seconds
select * from 
#costs C inner join #dist D on C.itemID = D.itemID and C.stage = D.stage
left join #child1 D1 on D.distID = D1.distID
left join #child2 D2 on D.distID = D2.distID
where C.itemID between 400 and 700


-- The sub query method usually is faster but has a higher number of reads
Select C.*, D.*, 
  (Select C1.externalID from #child1 C1 where C1.distID = D.distID) as external1, 
  (Select C2.externalID from #child2 C2 where C2.distID = D.distID) as external2
from  
#costs C inner join #dist D on C.itemID = D.itemID and C.stage = D.stage
where C.itemID between 400 and 700


-- Try using an inline view
select * from 
#costs C inner join 
(
  Select D.itemID, D.stage, D.distAmount, 
    D1.externalID as external1, D2.externalID as external2 from #dist D 
  left join #child1 D1 on D.distID = D1.distID
  left join #child2 D2 on D.distID = D2.distID
  where D.itemID in (select itemID from #costs where itemID between 400 and 700)
) D on C.itemID = D.itemID and C.stage = D.stage
where C.itemID between 400 and 700


-- A different type of inline view
select * from 
#costs C inner join #dist D on C.itemID = D.itemID and C.stage = D.stage
inner join 
(
  select distID, externalID as external1, null as external2 from #child1
  union all
  select distID, null as external1, externalID as external2 from #child2
) D1 on D.distID = D1.distID
where C.itemID between 400 and 700


-- Another different type of inline view
select * from 
#costs C inner join 
(
  Select D.itemID, D.stage, D.distAmount, D1.externalID as external1, null as external2
    from #dist D inner join #child1 D1 on D.distID = D1.distID
  union all  
  Select D.itemID, D.stage, D.distAmount, null as external1, D1.externalID as external2
    from #dist D inner join #child2 D1 on D.distID = D1.distID
) D on C.itemID = D.itemID and C.stage = D.stage
where C.itemID between 400 and 700




-- Radical idea.  What if I de-normalize so that the child data is part of the
-- parent table.
alter table #dist add external1 int, external2 int
GO
update D set D.external1 = D1.externalID from #dist D inner join #child1 D1 on D.distID = D1.distID
update D set D.external1 = D1.externalID from #dist D inner join #child2 D1 on D.distID = D1.distID


-- With the new table structure this should fly right?
-- Takes the same as the sub query version.  Not worth the risk in production
select * from 
#costs C inner join #dist D on C.itemID = D.itemID and C.stage = D.stage
where C.itemID between 400 and 700

God Bless

pootle_flump

1064 Posts

Posted - 2009-05-15 : 08:15:29

Some questions.
How come there are no primary keys\ unique indexes? How come the clustered indexes are non-unique?
If you change the indexes on the child tables to unique you get better times and better plans (merge instead of hash joins).
The query returns nearly 150k rows - do you really care if this is done a couple of seconds faster?
Also, maybe you abstracted the schema so I'm misreading, but can't child1 & child2 really be a single table?

Subscribe to SQLTeam.com

SQLTeam.com Articles via RSS

SQLTeam.com Weblog via RSS

- Advertisement -

Resources