/**** TIP000417 ****/



%macro program_document;

This macro will take 2 SAS datasets, and run a ONE-WAY Frequency on a list of vars which are common to both datasets, 
and then compare the differences in the frequencies of these variables, and produce a proc print of the distributions 
with their differences.

Sample Call:
***%procfreq_compare(base=old,  compare=new ,_vars_= var1 var2 var3 var4, format=7.1, cleanup=0);
            
parameters:
BASE    [REQUIRED] - name of BASE    sas dataset with the lib reference.
COMPARE [REQUIRED] - name of COMPARE sas dataset with the lib reference.
_VARS_  [REQUIRED] - a list of SAS variables which is common to both BASE and COMPARE sasdatasets, separated by blanks.
XWHERE  [OPTIONAL] - a valid WHERE clause to subset both BASE and COMPARE datasets.
WEIGHT  [OPTIONAL] - a sas variable common to both BASE and COMPARE datasets to weight the ONE-WAY Frequencies.
FORMAT  [OPTIONAL] - a valid format that can be applied to all the variables listed in _VARS_.  Default is best.2 for numeric variables.
CLEANUP [OPTIONAL] - 1 will cleanup all the work files not needed after comparison is completed.  Anything other value will keep work files around.

Notes:
1.  The SAS Lib References need to be issued before calling this macro.
2.  Do not mix character and numeric variables in the same list of _VARS_.
3.  Need to choose a format that is appropriate for all variables listed in _VARS_.

%mend program_document;

%macro get_Var_Type(inputds=, var=);
%local dsid varnum vartype rc;
%let dsid = %sysfunc(open(&inputds));
  %let varnum = %sysfunc(varnum(&dsid,&var));
  %let vartype = %left(%sysfunc(vartype(&dsid,&varnum)));&vartype
  %let rc = %sysfunc(close(&dsid));
%mend get_Var_Type;

%macro procfreq_compare ( base=, compare=, _vars_=, xwhere=, weight=, format=best.2, cleanup=1);

%if %length(&_vars_) gt 0 %then %do;
%let _vars_ = %cmpres(&_vars_);
%global howmany_vars;
  data _null_;
   howmany = 1 + (length(compbl("&_vars_")) - length(compress("&_vars_")));
   call symputx('howmany_vars', put(howmany,3.));
  run;
%end;

/***  FREQ on BASE or COMPARE Table ***/

  %macro run_freq(Base_Compare=BASE);
    proc freq data=&&&Base_Compare;
    %if %length(&xwhere) %then %do;
     where &xwhere;
     title2 "Where Clause = &xwhere";
    %end;
    
     %do _i_ = 1 %to &howmany_vars;
       table %Scan(&_vars_, &_i_,%STR( )) / missing outpct noprint out=&Base_Compare.&_i_;
       
       format %sysfunc( translate( %Scan(&_vars_, &_i_,%STR( )), ' ', '*') ) &format ;
     %end;
     %if %length(&weight) gt 0 %then %do;
      weight &weight;
     %end;
  %mend run_freq;

 %run_freq(BASE_COMPARE=BASE);
 %run_freq(BASE_COMPARE=COMPARE);

 %macro Join_Base_Compare;
    %do _i_ = 1 %to &howmany_vars;
    
          %let var_type = %get_Var_Type(inputds=base&_i_, var=%Scan(&_vars_, &_i_,%STR( )) );
          
	  proc sort data=base&_i_    out=base&_i_   ; by %Scan(&_vars_, &_i_,%STR( )); run;
	  proc sort data=compare&_i_ out=compare&_i_; by %Scan(&_vars_, &_i_,%STR( )); run;

	  data base&_i_;
	   set base&_i_;
	   %if &var_type = N %then %do;
               %Scan(&_vars_, &_i_,%STR( )) = input( put( %Scan(&_vars_, &_i_,%STR( )), &format), best.);
           %end;
           %else %do;
               %Scan(&_vars_, &_i_,%STR( )) = input( put( %Scan(&_vars_, &_i_,%STR( )), &format), &format);
           %end;
	  run;

	  data compare&_i_;
	   set compare&_i_;
	   %if &var_type = N %then %do;
               %Scan(&_vars_, &_i_,%STR( )) = input( put( %Scan(&_vars_, &_i_,%STR( )), &format), best.);
           %end;
           %else %do;
               %Scan(&_vars_, &_i_,%STR( )) = input( put( %Scan(&_vars_, &_i_,%STR( )), &format), &format);
           %end;
	  run;

	  data Diff&_i_;
	    length var_name $32.;
		label  var_values = "%Scan(&_vars_, &_i_,%STR( ))";
	    merge
	          base&_i_    (rename= ( count=base_cnt    percent=base_pct   ) )
		      compare&_i_ (rename= ( count=compare_cnt percent=compare_pct) )
			  ;

	    by %Scan(&_vars_, &_i_,%STR( ));

		var_name   = "%Scan(&_vars_, &_i_,%STR( ))"; 
		var_values =  %Scan(&_vars_, &_i_,%STR( ));

		label 
              base_cnt    = "Base Count"
			  base_pct    = "Base Percent"
			  compare_cnt = "Compare Count"
			  compare_pct = "Compare Percent"
			  diff_cnt    = "Difference Count"
			  diff_pct    = "Difference Percent"
			  ;

		if base_cnt    = . then base_cnt    = 0;
		if compare_cnt = . then compare_cnt = 0;
		diff_cnt       = compare_cnt - base_cnt;

		if base_pct    = . then base_pct    = 0;
		if compare_pct = . then compare_pct = 0;
		diff_pct       = compare_pct - base_pct;
		drop %Scan(&_vars_, &_i_,%STR( ));
           run;
	%end;

	/*** now join all the difference files into a single file 
	     delete _diff_ if it exists ***/
	proc datasets library=work nolist; delete _diff_; quit;
	%do _i_ = 1 %to &howmany_vars;
	  proc append base=_diff_  data=diff&_i_; run;
	 
  	 %if &cleanup = 1 %then %do;
  	  /*** delete the individual base and compare files to clean up ***/
  	  proc datasets library=work nolist; delete base&_i_ compare&_i_; quit;
  	 %end;
	%end;

	proc sort data=_diff_ out=_diff_; by var_name; run;

	title1 "Base File = &base";
	title2 "Compare File = &compare";

	proc print data=_diff_;
	by var_name;
	id var_name;
	pageby var_name;
	run;
%mend Join_Base_Compare;

%Join_Base_Compare;

run;
%mend procfreq_compare;

%macro samples;
  libname mylib "C:\DAD\sas_stuff\Proc_Freq_Compare";
  title3 "Compare Year to Year";
  %procfreq_compare(   base=mylib.sasalary_2000 , 
                    compare=mylib.sasalary_2001 ,
                     _vars_= Gender, 
                     format=$6.,
                    cleanup=0);

 proc format;
  value birthyr
   low- 1949 = '1950' 1950-1959 = '1960' 1960-1969 = '1970' 1970-1979 = '1980'
   1980-1989 = '1990' 1990-1999 = '2000' 2000-2009 = '2010'
   ;
   
  value yearsexp
   low-1  = '01'
   1-2    = '02'
   2-3    = '03'
   3-4    = '04'
   5-10   = '10'
   10-20  = '20'
   20-high= '21';
  run;

  libname mylib "C:\DAD\sas_stuff\Proc_Freq_Compare";
  title3 "Compare Year to Year";

  %procfreq_compare(   base=mylib.sasalary_2000 , 
                    compare=mylib.sasalary_2001 ,
                     _vars_= birthyr, 
                     format=birthyr5.,
                    cleanup=0);
    
    
               
  %procfreq_compare(   base=mylib.sasalary_2000 , 
                    compare=mylib.sasalary_2001 ,
                     _vars_= yearsexp, 
                     format=yearsexp3.,
		    cleanup=0);
               
                    
  %procfreq_compare(   base=mylib.sasalary_2000 , 
                    compare=mylib.sasalary_2001 ,
                     _vars_= educate, 
                     format=$10.,
                    cleanup=0);
                    
                    
  %procfreq_compare(   base=mylib.sasalary_2000 , 
                    compare=mylib.sasalary_2001 ,
                     _vars_= educate gender industry , 
                     format=$15.,
                    cleanup=0);

%mend samples;

/*** end of tip 00417 ***/