%macro check_dataset(dset =, obs_lim = max, eldest_age = 89) ; %local i ; %local inset_name ; %let inset_name = &dset ; %if %lowcase(&obs_lim) = max %then %do ; %** Nothing ; %end ; %else %do ; proc surveyselect data = &inset_name out = __sub_dset method = srs sampsize = &obs_lim SELECTALL seed = 1234567 noprint ; run; %let dset = __sub_dset ; %end ; %macro check_varname(regx, msg) ; create table possible_bad_vars as select name, label from these_vars where prxmatch(compress("/(®x)/i"), name) ; %if &sqlobs > 0 %then %do ; insert into phi_warnings(dset, variable, label, warning) select "&inset_name" as dset, name, label, "&msg" from possible_bad_vars ; %end ; %mend check_varname ; %macro check_vars_for_mrn(length_limit = 6, obs_lim = max) ; %local char ; %let char = 2 ; proc sql noprint ; select name into :mrn_array separated by ' ' from these_vars where type = &char and length ge &length_limit ; quit ; %if &sqlobs > 0 %then %do ; %put Checking these vars for possible MRN contents: &mrn_array ; data __gnu ; retain mrn_regex_handle badcount ; set &inset_name (obs = &obs_lim keep = &mrn_array) ; if _n_ = 1 then do ; mrn_regex_handle = prxparse("/&mrn_regex/") ; badcount = 0 ; end ; array p &mrn_array ; do i = 1 to dim(p) ; if prxmatch(mrn_regex_handle, p{i}) then do ; badvar = vname(p{i}) ; badvalue = p{i} ; badcount = _n_ ; output ; end ; keep badvar badvalue badcount ; end ; run ; proc sql noprint ; select compress(put(max(badcount), best.)) into :badcount from __gnu ; insert into phi_warnings(dset, variable, warning) select distinct "&inset_name", badvar, "Could this var hold MRN values? Contents of %trim(&badcount) records match the pattern given for MRN values. MRNs should never move across sites." from __gnu ; drop table __gnu ; quit ; %end ; %mend check_vars_for_mrn ; %macro check_vars_for_oldsters(eldest_age = 89, obs_lim = max) ; %local dtfmts ; %let dtfmts = 'B8601DA','B8601DN','B8601DT','B8601DZ','B8601LZ','B8601TM','B8601TZ','DATE','DATEAMPM','DATETIME','DAY','DDMMYY', 'DDMMYYB','DDMMYYC','DDMMYYD','DDMMYYN','DDMMYYP','DDMMYYS','DOWNAME','DTDATE','DTMONYY','DTWKDATX','DTYEAR', 'DTYYQC','E8601DA','E8601DN','E8601DT','E8601DZ','E8601LZ','E8601TM','E8601TZ','HHMM','HOUR','JULDAY','JULIAN', 'MMDDYY','MMDDYYB','MMDDYYC','MMDDYYD','MMDDYYN','MMDDYYP','MMDDYYS','MMSS','MMYY','MMYY','MONNAME','MONTH','MONYY', 'PDJULG','PDJULI','QTR','QTRR','WEEKDATE','WEEKDATX','WEEKDAY','WEEKU','WEEKV','WEEKW','WORDDATE','WORDDATX', 'YEAR','YYMM','YYMMC','YYMMD','YYMMN','YYMMP','YYMMS','YYMMDD','YYMMDDB','YYMMDDC','YYMMDDD','YYMMDDN','YYMMDDP', 'YYMMDDS','YYMON','YYQ','YYQC','YYQD','YYQN','YYQP','YYQS','YYQR','YYQRC','YYQRD','YYQRN','YYQRP','YYQRS' ; %local num ; %let num = 1 ; proc sql noprint ; select name into :dat_array separated by ' ' from these_vars where type = &num and (format in (&dtfmts) or lowcase(name) like '%date%') ; /* added by cb to shorten the process of looking at all dates */ %if &sqlobs > 0 %then %do ; %put Checking these vars for possible DOB contents: &dat_array ; select 'min(' || trim(name) || ') as ' || name into :var_list separated by ',' from these_vars where type = &num and (format in (&dtfmts) or lowcase(name) like '%date%') ; create table __gnu as select &var_list from &inset_name ; /* end cb additions */ quit ; data __gnu ; set __gnu (obs = &obs_lim keep = &dat_array) ; array d &dat_array ; do i = 1 to dim(d) ; if n(d{i}) then maybe_age = %calcage(bdtvar = d{i}, refdate = "&sysdate9."d) ; if maybe_age ge &eldest_age then do ; badvar = vname(d{i}) ; badvalue = d{i} ; output ; end ; keep badvar badvalue maybe_age ; end ; run ; proc sql outobs = 30 nowarn ; insert into phi_warnings(dset, variable, warning) select distinct "&inset_name", badvar, "If this is a date, at least one value is " || compress(put(maybe_age, best.)) || " years ago, which is older than &eldest_age.. " || "If this date applies to a person, the record is probably PHI." from __gnu ; drop table __gnu ; quit ; %end ; %else %do ; %put No obvious date variables found in &inset_name.--skipping age checks. ; %end ; %mend check_vars_for_oldsters ; proc contents noprint data = &inset_name out = these_vars ; run ; proc sql noprint ; create table phi_warnings (dset char(50), variable char(256), label char(256), warning char(200)) ; %check_varname(regx = mrn|hrn , msg = %str(Name suggests this var may be an MRN, which should never move across sites.)) ; %check_varname(regx = birth_date|BirthDate|DOB|BDate , msg = %str(Name suggests this var may be a date of birth.)) ; %check_varname(regx = SSN|SocialSecurityNumber|social_security_number|socsec, msg = %str(Name suggests this var may be a social security number.)) ; %if %symexist(locally_forbidden_varnames) %then %do ; %check_varname(regx = &locally_forbidden_varnames, msg = %str(May be on the locally defined list of variables not allowed to be sent to other sites.)) ; %end ; quit ; %check_vars_for_mrn(obs_lim = &obs_lim) ; %check_vars_for_oldsters(obs_lim = &obs_lim, eldest_age = &eldest_age) ; title3 "WARNINGS for dataset &inset_name:" ; proc sql noprint ; select count(*) as num_warns into :num_warns from phi_warnings ; %if &num_warns = 0 %then %do ; reset print outobs = 5 NOWARN ; select "No obvious PHI-like data elements in &inset_name--BUT PLEASE INSPECT THE CONTENTS AND PRINTs TO FOLLOW" as x label = "No warnings for &inset_name" from &inset_name ; %do i = 1 %to 5 ; %put No obvious phi-like data elements in &inset_name. BUT PLEASE INSPECT THE CONTENTS AND PRINTs CAREFULLY TO MAKE SURE OF THIS! ; %end ; %end ; %else %do ; reset print ; select variable, warning from phi_warnings order by variable, warning ; quit ; %end ; title3 "Dataset &inset_name" ; proc contents data = &inset_name varnum ; run ; /* proc print data = &inset_name (obs = 20) ; run ; */ ** TODO: make the print print out recs that trip the value warnings. ; proc sql number ; select * from &inset_name (obs = 20) ; quit ; quit ; %RemoveDset(dset = __sub_dset) ; %RemoveDset(dset = possible_bad_vars) ; %RemoveDset(dset = phi_warnings) ; %RemoveDset(dset = these_vars) ; %mend check_dataset ; %macro detect_phi(transfer_lib, obs_lim = max, eldest_age = 89) ; %put ; %put ; %put ============================================================== ; %put ; %put Macro detect_phi: ; %put ; %put Checking all datasets found in %sysfunc(pathname(&transfer_lib)) for the following signs of PHI: ; %put - Variable names signifying sensitive items like 'MRN', 'birth_date', 'SSN' and so forth. ; %if %symexist(locally_forbidden_varnames) %then %do ; %put - Variable names on the list defined in the standard macro variable locally_forbidden_varnames (here those names are: &locally_forbidden_varnames). ; %end ; %put - Contents of CHARACTER variables that match the pattern given in the standard macro variable mrn_regex (here that var is &mrn_regex) ; %put Please note that numeric variables ARE NOT CHECKED FOR MRN-LIKE CONTENT. ; %put - The contents of date variables (as divined by their formats) for values that, if they were DOBs, would indicate a person older than &eldest_age years. ; %put ; %put THIS IS BETA SOFTWARE-PLEASE SCRUTINIZE THE RESULTS AND REPORT PROBLEMS TO pardee.r@ghc.org. ; %put ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put THIS MACRO IS NOT A SUBSTITUTE FOR HUMAN INSPECTION AND THOUGHT--PLEASE CAREFULLY INSPECT ALL VARIABLES--WHETHER ; %put OR NOT THEY TRIP A WARNING--TO MAKE SURE THE DATA COMPORTS WITH YOUR DATA SHARING AGREEMENT!!! ; %put ; %put ; %put ============================================================== ; %put ; %put ; title1 "PHI-Detection Report for the datasets in %sysfunc(pathname(&transfer_lib))." ; title2 "please inspect all output carefully to make sure it comports with your data sharing agreement!!!" ; proc sql noprint ; ** describe table dictionary.tables ; select trim(libname) || '.' || memname as dset into :d1-:d999 from dictionary.tables where libname = "%upcase(&transfer_lib)" AND memtype = 'DATA' ; %local num_dsets ; %let num_dsets = &sqlobs ; quit ; %local i ; %if &num_dsets = 0 %then %do i = 1 %to 10 ; %put ERROR: NO DATASETS FOUND IN &transfer_lib!!!! ; %end ; %do i = 1 %to &num_dsets ; %put about to check &&d&i ; %check_dataset(dset = &&d&i, obs_lim = &obs_lim, eldest_age = &eldest_age) ; %end ; %mend detect_phi ;