Data validation - special characters - Dual Test - DBT

21/02/2022 18:02
;with data_source
As
(
  
  
    select 
 
            Translate(NEW_MISMATCH_VAL,
                      '[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
                      '[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
                     ) as NEW_MISMATCH_VAL_NO_ACCENTS,
            Translate(OLD_MISMATCH_VAL,
                      '[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
                      '[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
                     ) as OLD_MISMATCH_VAL_NO_ACCENTS,
            Regexp_replace(
                            regexp_replace(
                                            LEFT(NEW_MISMATCH_VAL_NO_ACCENTS, len(OLD_MISMATCH_VAL)), 
                                            '[^\x00-\x7F]'
                                           ),
                                            '[^a-zA-Z0-9]+'
                          ) as NEW_MISMATCH_NO_SPECIAL_CHARACTER,
             Regexp_replace(
                            regexp_replace(
                                            OLD_MISMATCH_VAL_NO_ACCENTS, 
                                            '[^\x00-\x7F]'
                                           ),
                                            '[^a-zA-Z0-9]+'
                          ) as OLD_MISMATCH_NO_SPECIAL_CHARACTER,
            HASH(LEFT(NEW_MISMATCH_VAL, len(OLD_MISMATCH_VAL))) as NEW_MISMATCH_HASH,
            HASH(OLD_MISMATCH_VAL) as OLD_MISMATCH_HASH,          
            HASH(NEW_MISMATCH_NO_SPECIAL_CHARACTER) as NEW_MISMATCH_HASH_NS,
            HASH(OLD_MISMATCH_NO_SPECIAL_CHARACTER) as OLD_MISMATCH_HASH_NS,
            ascii(RIGHT(NEW_MISMATCH_VAL,1)) as total_NEW_MISMATCH_LAST_C,  
            ascii(RIGHT(OLD_MISMATCH_VAL,1)) as OLD_MISMATCH_VAL_LAST_C,
            ascii(left(NEW_MISMATCH_VAL,1)) as FIRST_C_NEW_MISMATCH,  
            ascii(leFT(OLD_MISMATCH_VAL,1)) as FIRST_C_OLD_MISMATCH_VAL, 
            len(NEW_MISMATCH_VAL) as Size_NEW_MISMATCH_VAL,  
            len(OLD_MISMATCH_VAL) as Size_OLD_MISMATCH_VAL, 
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 1 else 0 End as IsTruncating,
            Case when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 1 else 0 End as IsNew_record_due_to_timing_of_build,
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 1 else 0 End as IsCharacter_set_conversion,
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 'Truncating'
                 when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 'Timing of build'
                 when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 'Character set conversion' 
                 else 'Others' End as IssueType,
            * 
    from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_OPTY_PE_4e280d75455916c5c5824770948fb371  a
    --where MISMATCH_REASON = 'Missing in source' 
          -- and substr(old_mismatch_val, 1,  length(new_mismatch_val)) <> new_mismatch_val --<< Change it to the column you want to validate
          -- and isNEW_RECORD_DUE_TO_TIMING_OF_BUILD = 1
        
 -- select * from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_ASSET_P_f9eda9b76a6c2c7ec42df494baee112b 
)
--select _fivetran_synced,* from afg.raw_flex_siebel.S_EMPLOYEE_X where row_id in (select "integration_id" from data_source)
 
 
select  MISMATCH_REASON,  
        count(MISMATCH_REASON) as Discrepancies,
        count_if(IsTruncating=1) as Total_Truncation_in_Oracle_warehouse,
        count_if(NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS) as Total_New_record_due_to_timing_of_build,       
        count_if(NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1)) as Total_character_set_conversion
from data_source 
where  MISMATCH_REASON NOT IN ('PERSON_WID','OPTY_WID')
group by MISMATCH_REASON 
order by 2 desc;
;with data_source
As
(
  
  
    select 
 
            Translate(NEW_MISMATCH_VAL,
                      '[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
                      '[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
                     ) as NEW_MISMATCH_VAL_NO_ACCENTS,
            Translate(OLD_MISMATCH_VAL,
                      '[à, è, ì, ò, ù, À, È, Ì, Ò, Ù, á, é, í, ó, ú, ý, Á, É, Í, Ó, Ú, Ý, â, ê, î, ô, û, Â, Ê, Î, Ô, Û, ã, ñ, õ, Ã, Ñ, Õ, ä, ë, ï, ö, ü, ÿ, Ä, Ë, Ï, Ö, Ü, Ÿ, å, Å, ç, Ç]',
                      '[a, e, i, o, u, A, E, I, O, U, a, e, i, o, u, y, A, E, I, O, U, Y, a, e, i, o, u, A, E, I, O, U, a, n, o, A, N, O, a, e, i, o, u, y, A, E, I, O, U, Y, a, A, c, C]'
                     ) as OLD_MISMATCH_VAL_NO_ACCENTS,
            Regexp_replace(
                            regexp_replace(
                                            LEFT(NEW_MISMATCH_VAL_NO_ACCENTS, len(OLD_MISMATCH_VAL)), 
                                            '[^\x00-\x7F]'
                                           ),
                                            '[^a-zA-Z0-9]+'
                          ) as NEW_MISMATCH_NO_SPECIAL_CHARACTER,
             Regexp_replace(
                            regexp_replace(
                                            OLD_MISMATCH_VAL_NO_ACCENTS, 
                                            '[^\x00-\x7F]'
                                           ),
                                            '[^a-zA-Z0-9]+'
                          ) as OLD_MISMATCH_NO_SPECIAL_CHARACTER,
            HASH(LEFT(NEW_MISMATCH_VAL, len(OLD_MISMATCH_VAL))) as NEW_MISMATCH_HASH,
            HASH(OLD_MISMATCH_VAL) as OLD_MISMATCH_HASH,          
            HASH(NEW_MISMATCH_NO_SPECIAL_CHARACTER) as NEW_MISMATCH_HASH_NS,
            HASH(OLD_MISMATCH_NO_SPECIAL_CHARACTER) as OLD_MISMATCH_HASH_NS,
            ascii(RIGHT(NEW_MISMATCH_VAL,1)) as total_NEW_MISMATCH_LAST_C,  
            ascii(RIGHT(OLD_MISMATCH_VAL,1)) as OLD_MISMATCH_VAL_LAST_C,
            ascii(left(NEW_MISMATCH_VAL,1)) as FIRST_C_NEW_MISMATCH,  
            ascii(leFT(OLD_MISMATCH_VAL,1)) as FIRST_C_OLD_MISMATCH_VAL, 
            len(NEW_MISMATCH_VAL) as Size_NEW_MISMATCH_VAL,  
            len(OLD_MISMATCH_VAL) as Size_OLD_MISMATCH_VAL, 
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 1 else 0 End as IsTruncating,
            Case when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 1 else 0 End as IsNew_record_due_to_timing_of_build,
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 1 else 0 End as IsCharacter_set_conversion,
            Case when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and Size_NEW_MISMATCH_VAL <> Size_OLD_MISMATCH_VAL then 'Truncating'
                 when NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS then 'Timing of build'
                 when NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1) then 'Character set conversion' 
                 else 'Others' End as IssueType,
            * 
    from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_OPTY_PE_4e280d75455916c5c5824770948fb371  a
    --where MISMATCH_REASON = 'Missing in source' 
          -- and substr(old_mismatch_val, 1,  length(new_mismatch_val)) <> new_mismatch_val --<< Change it to the column you want to validate
          -- and isNEW_RECORD_DUE_TO_TIMING_OF_BUILD = 1
        
 -- select * from DEV_AFG.dbt_test__audit.dual_warehouse_test_WC_ASSET_P_f9eda9b76a6c2c7ec42df494baee112b 
)
--select _fivetran_synced,* from afg.raw_flex_siebel.S_EMPLOYEE_X where row_id in (select "integration_id" from data_source)
 
 
select  MISMATCH_REASON,  
        count(MISMATCH_REASON) as Discrepancies,
        count_if(IsTruncating=1) as Total_Truncation_in_Oracle_warehouse,
        count_if(NEW_MISMATCH_HASH_NS <> OLD_MISMATCH_HASH_NS) as Total_New_record_due_to_timing_of_build,       
        count_if(NEW_MISMATCH_HASH_NS = OLD_MISMATCH_HASH_NS and NEW_MISMATCH_HASH <> OLD_MISMATCH_HASH and not(IsTruncating = 1)) as Total_character_set_conversion
from data_source 
where  MISMATCH_REASON NOT IN ('PERSON_WID','OPTY_WID')
group by MISMATCH_REASON 
order by 2 desc;