tip00412

**************************************************************************************** ;
*********     extract_top_bottom_freq.sas             ********************************** ;
**************************************************************************************** ;
/***  
The purpose of this macro is to select a sample of those keys within a SAS dataset that have 
the most number of frequencies (top) and the least number of frequencies (bottom) to create 
a representative extract of the master file (_sasdsnin_) and save them to a sample dataset 
(_sasdsnout_) 

It is strongly recommended the _sasdsnout_ dataset is deleted before this macro is 
executed to ensure all variables are processed as expected due to use of proc append. 

1st parameter _sasdsnin_ =name of sas dataset to draw sample from   (ex: r2.trv0200) 
2nd parameter _sasdsnout_=name of sas dataset to put  sample into   (ex: r2.trv0200_sample) 
3rd parameter _top_      =How many of top keys to keep              (ex: 10) 
4th parameter _bottom_   =How many of bottom keys to keep           (ex: 15) 
5th parameter _key_      =What is KEY variable to base selection on (ex:claim_number);
***/

%macro extract_top_bottom_freq( _sasdsnin_=, _sasdsnout_=, _top_=, _bottom_=, _key_=);
/*** do a frequency of _key_ variable first ***/
proc freq data=&_sasdsnin_ order=freq ;
 table &_key_ / missing noprint out=_extract_ (keep=&_key_ count);
run;
/*** get only the _top_ frequencies number of _key_ ***/
 data extract_top;
  set _extract_ (keep=&_key_ obs=&_top_);
 run;

 /*** now extract those _top_ _key_ records from _sasdsnin_ ***/
 proc sql;
  create table extract_top_sample as
   select *
   from &_sasdsnin_ m,
           extract_top s
   where m.&_key_ eq s.&_key_;
 quit;

 /*** go back and get _bottom_ frequencies of _key_ records ***/
 proc sort data=_extract_ out=_extract_; by count; run;

 /*** keep only the _bottom_ _key_ records ***/
 data extract_bottom;
  set _extract_ (keep=&_key_ obs=&_bottom_);
 run;

 /*** now extract those _bottom_ _key_ records from _sasdsnin_ ***/
 proc sql;
  create table extract_bottom_sample as
   select *
   from &_sasdsnin_ m,
           extract_bottom s
   where m.&_key_ eq s.&_key_;
 quit;

 /*** now join _top_ and _bottom_ records into a single SAS dataset _sasdsnout_ ***/
proc append base=&_sasdsnout_ data=extract_top_sample   ; quit;
proc append base=&_sasdsnout_ data=extract_bottom_sample; quit;

/*** clean up the temporary files ***/
proc datasets library=work nolist; delete _extract_ extract_top        extract_bottom 
                                                    extract_top_sample extract_bottom_sample; quit;

%mend  extract_top_bottom_freq;

/*** sample run and call to extract_top_bottom_freq macro 
proc datasets library=r2 nolist; delete trv0200_sample ; run;

%extract_top_bottom_freq( _sasdsnin_=r2.trv0200, 
                          _sasdsnout_=r2.trv0200_sample, 
                          _top_=15, 
                          _bottom_=15, 
                          _key_=claim_number);
***/
/*** end of extract_top_bottom_freq.sas - tip 00412***/