Data validation - special characters - Dual Test - DBT
21/02/2022 18:02
;with data_source
As
(
select
Translate(NEW_MISMATCH_VAL,
'[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
'[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
) as NEW_MISMATCH_VAL_NO_ACCENTS,
Translate(OLD_MISMATCH_VAL,
'[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
'[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
) as OLD_MISMATCH_VAL_NO_ACCENTS,
Regexp_replace(
regexp_replace(
LEFT(NEW_MISMATCH_VAL_NO_ACCENTS, len(OLD_MISMATCH_VAL)),
'[^\x00-\x7F]'
),
'[^a-zA-Z0-9]+'
) as NEW_MISMATCH_NO_SPECIAL_CHARACTER,
Regexp_replace(
regexp_replace(
OLD_MISMATCH_VAL_NO_ACCENTS,
'[^\x00-\x7F]'
),
'[^a-zA-Z0-9]+'
) as OLD_MISMATCH_NO_SPECIAL_CHARACTER,
HASH(LEFT(NEW_MISMATCH_VAL, len(OLD_MISMATCH_VAL))) as NEW_MISMATCH_HASH,
HASH(OLD_MISMATCH_VAL) as OLD_MISMATCH_HASH,
HASH(NEW_MISMATCH_NO_SPECIAL_CHARACTER) as NEW_MISMATCH_HASH_NS,
HASH(OLD_MISMATCH_NO_SPECIAL_CHARACTER) as OLD_MISMATCH_HASH_NS,
ascii(RIGHT(NEW_MISMATCH_VAL,1)) as total_NEW_MISMATCH_LAST_C,
ascii(RIGHT(OLD_MISMATCH_VAL,1)) as OLD_MISMATCH_VAL_LAST_C,
ascii(left(NEW_MISMATCH_VAL,1)) as FIRST_C_NEW_MISMATCH,
ascii(leFT(OLD_MISMATCH_VAL,1)) as FIRST_C_OLD_MISMATCH_VAL,
len(NEW_MISMATCH_VAL) as Size_NEW_MISMATCH_VAL,
len(OLD_MISMATCH_VAL) as Size_OLD_MISMATCH_VAL,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 1 else 0 End as IsTruncating,
Case when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 1 else 0 End as IsNew_record_due_to_timing_of_build,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 1 else 0 End as IsCharacter_set_conversion,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 'Truncating'
when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 'Timing of build'
when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 'Character set conversion'
else 'Others' End as IssueType,
*
from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_OPTY_PE_4e280d75455916c5c5824770948fb371 a
--where MISMATCH_REASON = 'Missing in source'
-- and substr(old_mismatch_val, 1, length(new_mismatch_val)) <> new_mismatch_val --<< Change it to the column you want to validate
-- and isNEW_RECORD_DUE_TO_TIMING_OF_BUILD = 1
-- select * from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_ASSET_P_f9eda9b76a6c2c7ec42df494baee112b
)
--select _fivetran_synced,* from afg.raw_flex_siebel.S_EMPLOYEE_X where row_id in (select "integration_id" from data_source)
select MISMATCH_REASON,
count(MISMATCH_REASON) as Discrepancies,
count_if(IsTruncating=1) as Total_Truncation_in_Oracle_warehouse,
count_if(NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS) as Total_New_record_due_to_timing_of_build,
count_if(NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1)) as Total_character_set_conversion
from data_source
where MISMATCH_REASON NOT IN ('PERSON_WID','OPTY_WID')
group by MISMATCH_REASON
order by 2 desc;
;with data_source
As
(
select
Translate(NEW_MISMATCH_VAL,
'[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
'[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
) as NEW_MISMATCH_VAL_NO_ACCENTS,
Translate(OLD_MISMATCH_VAL,
'[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
'[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
) as OLD_MISMATCH_VAL_NO_ACCENTS,
Regexp_replace(
regexp_replace(
LEFT(NEW_MISMATCH_VAL_NO_ACCENTS, len(OLD_MISMATCH_VAL)),
'[^\x00-\x7F]'
),
'[^a-zA-Z0-9]+'
) as NEW_MISMATCH_NO_SPECIAL_CHARACTER,
Regexp_replace(
regexp_replace(
OLD_MISMATCH_VAL_NO_ACCENTS,
'[^\x00-\x7F]'
),
'[^a-zA-Z0-9]+'
) as OLD_MISMATCH_NO_SPECIAL_CHARACTER,
HASH(LEFT(NEW_MISMATCH_VAL, len(OLD_MISMATCH_VAL))) as NEW_MISMATCH_HASH,
HASH(OLD_MISMATCH_VAL) as OLD_MISMATCH_HASH,
HASH(NEW_MISMATCH_NO_SPECIAL_CHARACTER) as NEW_MISMATCH_HASH_NS,
HASH(OLD_MISMATCH_NO_SPECIAL_CHARACTER) as OLD_MISMATCH_HASH_NS,
ascii(RIGHT(NEW_MISMATCH_VAL,1)) as total_NEW_MISMATCH_LAST_C,
ascii(RIGHT(OLD_MISMATCH_VAL,1)) as OLD_MISMATCH_VAL_LAST_C,
ascii(left(NEW_MISMATCH_VAL,1)) as FIRST_C_NEW_MISMATCH,
ascii(leFT(OLD_MISMATCH_VAL,1)) as FIRST_C_OLD_MISMATCH_VAL,
len(NEW_MISMATCH_VAL) as Size_NEW_MISMATCH_VAL,
len(OLD_MISMATCH_VAL) as Size_OLD_MISMATCH_VAL,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 1 else 0 End as IsTruncating,
Case when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 1 else 0 End as IsNew_record_due_to_timing_of_build,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 1 else 0 End as IsCharacter_set_conversion,
Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 'Truncating'
when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 'Timing of build'
when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 'Character set conversion'
else 'Others' End as IssueType,
*
from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_OPTY_PE_4e280d75455916c5c5824770948fb371 a
--where MISMATCH_REASON = 'Missing in source'
-- and substr(old_mismatch_val, 1, length(new_mismatch_val)) <> new_mismatch_val --<< Change it to the column you want to validate
-- and isNEW_RECORD_DUE_TO_TIMING_OF_BUILD = 1
-- select * from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_ASSET_P_f9eda9b76a6c2c7ec42df494baee112b
)
--select _fivetran_synced,* from afg.raw_flex_siebel.S_EMPLOYEE_X where row_id in (select "integration_id" from data_source)
select MISMATCH_REASON,
count(MISMATCH_REASON) as Discrepancies,
count_if(IsTruncating=1) as Total_Truncation_in_Oracle_warehouse,
count_if(NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS) as Total_New_record_due_to_timing_of_build,
count_if(NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1)) as Total_character_set_conversion
from data_source
where MISMATCH_REASON NOT IN ('PERSON_WID','OPTY_WID')
group by MISMATCH_REASON
order by 2 desc;