Joining a Dim to fact table where the dim table key exists in multiple fact columns

  • This has probably been asked 100 times but I haven't found anything good on it.

    Say you have a fact table with a few columns that all reference the same key column in a dimension table, you want to write a view to return the information for those keys, what is the best way?

    hopefully this will explain better...

    USE MyTestDB;

    GO

    SET NOCOUNT ON;

    IF OBJECT_ID ('dbo.FactTemp' ,'U') IS NOT NULL

    DROP TABLE dbo.FactTemp;

    IF OBJECT_ID ('dbo.DimTemp', 'U') IS NOT NULL

    DROP TABLE dbo.DimTemp;

    CREATE TABLE dbo.DimTemp

    ( DimTempID INT IDENTITY(1,1) NOT NULL

    , DimTempName NVARCHAR(100) NOT NULL

    , CONSTRAINT PK_DimTemp_DimTempID PRIMARY KEY (DimTempID)

    );

    CREATE TABLE dbo.FactTemp

    ( FactTempID INT IDENTITY (1,1) NOT NULL

    , CaseID INT NOT NULL

    , AssignedTo INT NULL

    , LoggedBy INT NOT NULL

    , LoggedFor INT NOT NULL

    , DateLogged DATETIME NOT NULL

    , CONSTRAINT PK_FactTemp_FactTempID PRIMARY KEY (FactTempID)

    , CONSTRAINT FK_FactTemp_AssignedTo FOREIGN KEY (AssignedTo) REFERENCES dbo.DimTemp(DimTempID)

    , CONSTRAINT FK_FactTemp_LoggedBy FOREIGN KEY (LoggedBy) REFERENCES dbo.DimTemp(DimTempID)

    , CONSTRAINT FK_FactTemp_LoggedFor FOREIGN KEY (LoggedFor) REFERENCES dbo.DimTemp(DimTempID)

    );

    INSERT dbo.DimTemp(DimTempName)

    VALUES ('John'), ('Andrew'), ('Kev'), ('Brian'), ('Keith'), ('Lisa'), ('Laura'), ('Jane'), ('Alice');

    --SELECT * FROM dbo.DimTemp;

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (1, NULL, 2, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -1, GETDATE())))); -- LOGG FOR YESTERDAY, 5 HOURS EARLIER

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (1, 4, 2, 3, (DATEADD(DAY, -1, GETDATE()))); -- LOGG FOR YESTERDAY

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (1, 1, 2, 3, GETDATE());

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (2, 9, 8, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -3, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (2, 5, 8, 7, (DATEADD(DAY, -3, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (2, 6, 8, 7, GETDATE());

    --Cross over

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (3, 4, 2, 7, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (3, NULL, 2, 7, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (3, 6, 2, 7, GETDATE());

    --Cross over with null

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (4, 4, 8, 3, (DATEADD(HOUR, -5, DATEADD(DAY, -2, GETDATE())))); -- LOGG FOR 3 DAYS AGO, 5 HOURS EARLIER

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (4, NULL, 8, 3, (DATEADD(DAY, -2, GETDATE()))); -- LOGG FOR 3 DAYS AGO

    INSERT dbo.FactTemp(CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged)

    VALUES (4, NULL, 8, 3, GETDATE());

    --SELECT * FROM dbo.FactTemp ORDER BY CaseID ASC, DateLogged DESC;

    --Get lates record;

    IF OBJECT_ID ('tempdb..#GetLatest' ,'U') IS NOT NULL

    DROP TABLE dbo.#GetLatest;

    SELECT FactTempID

    , CaseID

    , AssignedTo

    , LoggedBy

    , LoggedFor

    , DateLogged

    , ROW_NUMBER() OVER(PARTITION BY CaseID ORDER BY CaseID ASC, DateLogged DESC) as RwNum

    INTO dbo.#GetLatest

    FROM dbo.FactTemp;

    CREATE CLUSTERED INDEX CIX_#GetLatest_All ON dbo.#GetLatest (FactTempID, CaseID, AssignedTo, LoggedBy, LoggedFor, DateLogged, RwNum);

    --GO

    DBCC FREESESSIONCACHE

    DBCC FREEPROCCACHE

    DBCC DROPCLEANBUFFERS

    GO

    SELECT FactTempID

    , CaseID

    , AssignedTo

    , dtTo.DimTempName as AssignedToName

    , LoggedBy

    , dtBy.DimTempID as LoggedByName

    , LoggedFor

    , dtFor.DimTempID as LoggedForName

    , DateLogged

    FROM dbo.#GetLatest as gl

    LEFT JOIN dbo.DimTemp as dtTo

    ON gl.AssignedTo = dtTo.DimTempID

    LEFT JOIN dbo.DimTemp as dtBy

    ON gl.LoggedBy = dtBy.DimTempID

    LEFT JOIN dbo.DimTemp as dtFor

    ON gl.LoggedFor = dtFor.DimTempID

    WHERE RwNum = 1

    GO

    SELECT gl.FactTempID

    , gl.CaseID

    --, c.AssignedTo

    , MAX(CASE WHEN gl.AssignedTo = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName

    --, c.LoggedBy

    , MAX(CASE WHEN gl.LoggedBy = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS LoggedByName

    --, c.LoggedFor

    , MAX(CASE WHEN gl.LoggedFor = dt.DimTempID THEN dt.DimTempName ELSE NULL END) AS AssignedToName

    --, c.DateLogged

    FROM dbo.#GetLatest as gl

    JOIN dbo.DimTemp as dt

    ON (gl.AssignedTo = dt.DimTempID

    OR gl.LoggedBy = dt.DimTempID

    OR gl.LoggedFor = dt.DimTempID)

    WHERE RwNum = 1

    GROUP BY FactTempID

    , CaseID;

    GO

    I'm using very small data at the moment, and the query plan and statistics don't really say which way. Has anyone encountered this and found the best solution when the data grows? I'm a bit dubious about the OR join as I seem to remember having a problem with it years ago.

    Thanks,

  • Your fact key columns should always have matching entries in their associated dim tables, even if that entry is -1 (unknown, N/A, or whatever default you choose).

    Once you have this in place, you can always use inner joins.

    If you haven't even tried to resolve your issue, please don't expect the hard-working volunteers here to waste their time providing links to answers which you could easily have found yourself.

  • For production they will, at the the moment I was looking at the issue of joining into the multiple matching columns

Viewing 3 posts - 1 through 2 (of 2 total)

You must be logged in to reply to this topic. Login to reply