RUN;
** START THE MATCHING PROCESS;
** MATCH AGENT RECORD ( NAME2 ) AGAINST HSB PRODUCER STREET ADDRESS;
DATA NOMATCH2 SAVE3;
DO P = 1 TO TAR;
SET NOMATCH1 NOBS=TAR POINT=P;
IF _ERROR_ = 1 THEN ABORT;
H = 1;
HIT = 0;
DO H = 1 TO HSB;
%INCLUDE AGTSTATE;
IF _ERROR_ = 1 THEN ABORT;
CNTMATCH = 0;
CNTZIP = 0;
** CHECK TO SEE IF ZIP CODE IS CLOSE TO HSB ZIP CODE;
IF (SUBSTR(TARZIP,1,3)) ne (SUBSTR(SCRZIP,1,3)) THEN GOTO STARTOVR;
CNTZIP = 1;
** FIND A MATCH BASED ON AGENCY 2 NAME;
DO WORD = 1 TO 20;
IF WORD = 1 THEN SUBTRACT = 0;
SEARCH = SCAN(SCRSTR1,WORD,' ');
IF SEARCH='ALY' OR SEARCH='ANX' OR SEARCH='ARC'
OR SEARCH='BLVD' OR SEARCH='ST' OR SEARCH='CSWY'
OR SEARCH='CIR' OR SEARCH='XING' OR SEARCH='EXPY'
OR SEARCH='RD' OR SEARCH='AVE' OR SEARCH='DR'
OR SEARCH='PK' OR SEARCH='PKWY' OR SEARCH='HWY'
OR SEARCH='STE' OR SEARCH='SQ' OR SEARCH='RM'
OR SEARCH='CTR' OR SEARCH='CT' OR SEARCH='BOX'
OR SEARCH='DRAWER' OR SEARCH='DRAW' OR SEARCH='MT'
OR SEARCH='PLZ' OR SEARCH='PL' OR SEARCH='TER'
OR SEARCH='TPKE' OR SEARCH='FL'
THEN SUBTRACT + 1 ;
IF ' ' = SCAN(SCRSTR1,WORD,' ') THEN NWORD = WORD - 1;
IF ' ' = SCAN(SCRSTR1,WORD,' ') THEN GOTO DONE1;
END;
DONE1: IF NWORD = . THEN NWORD = 0;
IF SUBTRACT = . THEN SUBTRACT = 0;
NWORD = NWORD + (-1 * SUBTRACT);
DO WORD = 1 TO 20;
IF WORD = 1 THEN SUBTRACT = 0;
SEARCH = SCAN(TARNAME2,WORD,' ');
IF SEARCH='ALY' OR SEARCH='ANX' OR SEARCH='ARC'
OR SEARCH='BLVD' OR SEARCH='ST' OR SEARCH='CSWY'
OR SEARCH='CIR' OR SEARCH='XING' OR SEARCH='EXPY'
OR SEARCH='RD' OR SEARCH='AVE' OR SEARCH='DR'
OR SEARCH='PK' OR SEARCH='PKWY' OR SEARCH='HWY'
OR SEARCH='STE' OR SEARCH='SQ' OR SEARCH='RM'
OR SEARCH='CTR' OR SEARCH='CT'
OR SEARCH='DRAWER' OR SEARCH='DRAW' OR SEARCH='MT'
OR SEARCH='PLZ' OR SEARCH='PL' OR SEARCH='TER'
OR SEARCH='TPKE' OR SEARCH='FL'
THEN SUBTRACT + 1 ;
IF ' ' = SCAN(TARNAME2,WORD,' ') THEN TWORD = WORD - 1;
IF ' ' = SCAN(TARNAME2,WORD,' ') THEN GOTO DONE2;
END;
DONE2: IF TWORD = . THEN TWORD = 0;
IF SUBTRACT = . THEN SUBTRACT = 0;
TWORD = TWORD + (-1 * SUBTRACT);
DO WORD = 1 TO TWORD;
SEARCH = SCAN(TARNAME2,WORD,' ');
IF TRIM(COMPRESS(SEARCH)) = 'BOX' THEN
SEARCH=TRIM(SEARCH)|| ' ' ||SCAN(TARNAME2,WORD+1,' ');
IF LENGTH(TRIM(SEARCH)) = 1 AND SEARCH ne ' ' THEN
SEARCH=TRIM(SEARCH) ||' '||SCAN(TARNAME2,WORD+1,' ');
IF LENGTH(TRIM(SEARCH)) = 1 THEN GOTO ENDNAME1;
%INCLUDE ELIMADDR;
THEN GOTO ENDNAME1;
IF 0 NE INDEX(SCRSTR1,TRIM(SEARCH)) THEN GOTO FOUND;
SQZESRCH= TRIM(COMPRESS(SEARCH,'AEIOUY'));
IF LENGTH(TRIM(SQZESRCH)) le 1 THEN GOTO ENDNAME1;
SQZESCR = TRIM(COMPRESS(SCRSTR1,'AEIOUY'));
IF 0=INDEX(SQZESCR,TRIM(SQZESRCH)) THEN GOTO ENDNAME1;
FOUND:
CNTMATCH + 1;
HIT + 1 ;
ENDNAME1: END;
IF CNTMATCH gt 0 THEN DO;
MCNT3 = CNTMATCH;
TCNT3 = TWORD;
WCNT3 = NWORD;
OUTPUT SAVE3;
END;
STARTOVR:IF H ge HSB THEN GOTO GETNEXT; END;
GETNEXT: IF H ge HSB THEN H= 1; IF HIT lt 1 THEN OUTPUT NOMATCH2;
IF _ERROR_ = 1 THEN _ERROR_ = 0;
END;
STOP;
RUN;
** START THE MATCHING PROCESS;
** MATCH AGENT RECORD ( ADDR ) AGAINST HSB PRODUCER STREET ADDRESS;
DATA NOMATCH3 SAVE4;
DO P = 1 TO TAR;
SET NOMATCH2 NOBS=TAR POINT=P;
IF _ERROR_ = 1 THEN ABORT;
H = 1;
HIT = 0;
DO H = 1 TO HSB;
%INCLUDE AGTSTATE;
IF _ERROR_ = 1 THEN ABORT;
CNTMATCH = 0;
CNTZIP = 0;
** CHECK TO SEE IF ZIP CODE IS CLOSE TO HSB ZIP CODE;
IF (SUBSTR(TARZIP,1,3)) ne (SUBSTR(SCRZIP,1,3)) THEN GOTO STARTOVR;
CNTZIP = 1;
** FIND A MATCH BASED ON AGENCY ADDRESS;
DO WORD = 1 TO 20;
IF WORD = 1 THEN SUBTRACT = 0;
SEARCH = SCAN(SCRSTR1,WORD,' ');
IF SEARCH='ALY' OR SEARCH='ANX' OR SEARCH='ARC'
OR SEARCH='BLVD' OR SEARCH='ST' OR SEARCH='CSWY'
OR SEARCH='CIR' OR SEARCH='XING' OR SEARCH='EXPY'
OR SEARCH='RD' OR SEARCH='AVE' OR SEARCH='DR'
OR SEARCH='PK' OR SEARCH='PKWY' OR SEARCH='HWY'
OR SEARCH='STE' OR SEARCH='SQ' OR SEARCH='RM'
OR SEARCH='CTR' OR SEARCH='CT' OR SEARCH='BOX'
OR SEARCH='DRAWER' OR SEARCH='DRAW' OR SEARCH='MT'
OR SEARCH='PLZ' OR SEARCH='PL' OR SEARCH='TER'
OR SEARCH='TPKE' OR SEARCH='FL'
THEN SUBTRACT + 1 ;
IF ' ' = SCAN(SCRSTR1,WORD,' ') THEN NWORD = WORD - 1;
IF ' ' = SCAN(SCRSTR1,WORD,' ') THEN GOTO DONE1;
END;
DONE1: IF NWORD = . THEN NWORD = 0;
IF SUBTRACT = . THEN SUBTRACT = 0;
NWORD = NWORD + (-1 * SUBTRACT);
DO WORD = 1 TO 20;
IF WORD = 1 THEN SUBTRACT = 0;
SEARCH = SCAN(TARADDR,WORD,' ');
IF SEARCH='ALY' OR SEARCH='ANX' OR SEARCH='ARC'
OR SEARCH='BLVD' OR SEARCH='ST' OR SEARCH='CSWY'
OR SEARCH='CIR' OR SEARCH='XING' OR SEARCH='EXPY'
OR SEARCH='RD' OR SEARCH='AVE' OR SEARCH='DR'
OR SEARCH='PK' OR SEARCH='PKWY' OR SEARCH='HWY'
OR SEARCH='STE' OR SEARCH='SQ' OR SEARCH='RM'
OR SEARCH='CTR' OR SEARCH='CT'
OR SEARCH='DRAWER' OR SEARCH='DRAW' OR SEARCH='MT'
OR SEARCH='PLZ' OR SEARCH='PL' OR SEARCH='TER'
OR SEARCH='TPKE' OR SEARCH='FL'
THEN SUBTRACT + 1 ;
IF ' ' = SCAN(TARADDR,WORD,' ') THEN TWORD = WORD - 1;
IF ' ' = SCAN(TARADDR,WORD,' ') THEN GOTO DONE2;
END;
DONE2: IF TWORD = . THEN TWORD = 0;
IF SUBTRACT = . THEN SUBTRACT = 0;
TWORD = TWORD + (-1 * SUBTRACT);
DO WORD = 1 TO TWORD;
SEARCH = SCAN(TARADDR,WORD,' ');
IF TRIM(COMPRESS(SEARCH)) = 'BOX' THEN
SEARCH = TRIM(SEARCH)||' '||SCAN(TARADDR ,WORD+1,' ');
IF LENGTH(TRIM(SEARCH)) = 1 AND SEARCH ne ' ' THEN
SEARCH=TRIM(SEARCH)||' '||SCAN(TARADDR ,WORD+1,' ');
IF LENGTH(TRIM(SEARCH)) = 1 THEN GOTO ENDNAME1;
%INCLUDE ELIMADDR;
THEN GOTO ENDNAME1;
IF 0 NE INDEX(SCRSTR1,TRIM(SEARCH)) THEN GOTO FOUND;
SQZESRCH= TRIM(COMPRESS(SEARCH,'AEIOUY'));
IF LENGTH(TRIM(SQZESRCH)) le 1 THEN GOTO ENDNAME1;
SQZESCR = TRIM(COMPRESS(SCRSTR1,'AEIOUY'));
IF 0=INDEX(SQZESCR,TRIM(SQZESRCH)) THEN GOTO ENDNAME1;
FOUND:
CNTMATCH + 1;
HIT + 1 ;
ENDNAME1: END;
IF CNTMATCH gt 0 THEN DO;
MCNT4 = CNTMATCH;
TCNT4 = TWORD;
WCNT4 = NWORD;
OUTPUT SAVE4;
END;
STARTOVR:IF H ge HSB THEN GOTO GETNEXT; END;
GETNEXT: IF H ge HSB THEN H= 1; IF HIT lt 1 THEN OUTPUT NOMATCH3;
IF _ERROR_ = 1 THEN _ERROR_ = 0;
END;
STOP;
RUN;
** COMBINE ALL RECORDS THAT WERE MATCHED ON SOME FIELD;
DATA SAVE;
SET SAVE1 SAVE2 SAVE3 SAVE4;
PROC DATASETS LIBRARY=WORK; DELETE SAVE1 SAVE2 SAVE3 SAVE4;
** COLLAPSE MATCHED RECORDS BY COMPANY AND AGENT CODE AND HSB CODE;
PROC SUMMARY DATA=SAVE NWAY MISSING;
CLASSES COMPCODE TARCODE SCRCODE;
VAR MCNT1 MCNT2 MCNT3 MCNT4
TCNT1 TCNT2 TCNT3 TCNT4
WCNT1 WCNT2 WCNT3 WCNT4 ;
OUTPUT OUT=SUMMARY
SUM=MCNT1 MCNT2 MCNT3 MCNT4
TCNT1 TCNT2 TCNT3 TCNT4
WCNT1 WCNT2 WCNT3 WCNT4 ;
PROC SORT DATA=SAVE OUT=SAVE;
BY COMPCODE TARCODE SCRCODE;
** GET 1 UNIQUE RECORD PER COMPANY AND AGENT CODE AND HSB CODE;
DATA SAVE (DROP=MCNT1 MCNT2 MCNT3 MCNT4
TCNT1 TCNT2 TCNT3 TCNT4
WCNT1 WCNT2 WCNT3 WCNT4
CNTMATCH TWORD NWORD);
SET SAVE;
BY COMPCODE TARCODE SCRCODE;
IF FIRST.SCRCODE;
** MERGE UNIQUE MATCHED RECORD WITH THOSE RECORDS THAT HAD SOME
** FIELD THAT WERE SIMILIAR;
DATA SAVE;
MERGE SAVE (IN=S) SUMMARY (IN=SUM);
BY COMPCODE TARCODE SCRCODE;
CNTMATCH = SUM(OF MCNT1 MCNT2 MCNT3 MCNT4);
TWORD = SUM(OF TCNT1 TCNT2 TCNT3 TCNT4);
NWORD = SUM(OF WCNT1 WCNT2 WCNT3 WCNT4);
PROC SORT DATA=SAVE OUT=SAVE ;
BY COMPCODE TARCODE DESCENDING CNTMATCH DESCENDING TWORD;
** SORT AND PRINT THOSE AGENT RECORDS WHERE A MATCHED WAS FOUND AND;
** PRINT BASED ON THE NUMBER OF WORDS THAT WERE FOUND AGAINST TOTAL;
** WORDS AVAILABLE TO BE MATCHED AGAINST ;
** THAT IS, HAVE THE HIGHEST LIKELY MATCH PRINT FIRST AND STORE IN ;
** IN THE AGENT MATCH1 FILE ;
DATA _NULL_;
SET SAVE END=EOF;
BY COMPCODE TARCODE DESCENDING CNTMATCH DESCENDING TWORD;
FILE OUTPRT NOPRINT NOTITLES;
TARPHONE = SUBSTR(TARPART2,41,13);
IF _N_ = 1 THEN
PUT 'AGENCY FILE HAD SOME CRITICAL FIELD IN COMMON WITH HSB AGENCY FILE' /;
IF FIRST.COMPCODE THEN PUT @1 132*'=' / @1 ' NEW COMPANY -' COMPCODE 3. /;
PUT
@1 TARCODE $20.'-' CNTMATCH 2. +2 TWORD 2. +2 NWORD 2. '-' SPECIAL $14.
@66 SCRCODE $7. /
@1 TARNAME1 $45. @66 SCRNAME $40. /
@1 TARNAME2 $45. @66 SCRSTR1 $66. /
@1 TARADDR $45. /
@1 TARCITY ',' TARSTATE ' ' TARZIP @66 SCRCITY ',' SCRSTATE ' ' SCRZIP /
@1 TARPHONE $13. '-' COMPCODE 3. /
@1 132*'*' /;
IF LAST.COMPCODE THEN PUT @1 132*'=' / @1 ' LAST COMPANY -' COMPCODE 3. ;
IF EOF THEN PUT // ;
RETURN;
DATA SASMATCH.MATCH; SET SAVE; MATCH = ' ';
PROC SORT DATA=NOMATCH3 OUT=NOMATCH3;
BY COMPCODE TARCODE;
**NOW SORT THOSE RECORDS WHERE NO MATCH WAS FOUND AND SEE IF IT ;
** EXISTS IN THE MATCHED FILE ;
DATA NOMATCH3;
SET NOMATCH3;
BY COMPCODE TARCODE;
IF FIRST.TARCODE;
PROC SORT DATA=SAVE OUT=SAVE; BY COMPCODE TARCODE;
DATA SAVE;
SET SAVE;
BY COMPCODE TARCODE;
IF FIRST.TARCODE;
DATA NOMATCH3;
MERGE NOMATCH3 (IN=N) SAVE (IN=S);
BY COMPCODE TARCODE;
IF N AND not s;
** NOW TAKE THOSE AGENT RECORDS THAT HAD NO MATCHING AND PUT AT BOTTOM;
** OF THE AGENT MATCH1 FILE FOR AUDIT PURPOSES OR ADDITIONAL SPECIAL ;
** MATCHING ROUTINE IF AVAILABLE ELSEWHERE ;
DATA _NULL_;
SET NOMATCH3 END=EOF;
BY COMPCODE TARCODE;
FILE OUTPRT NOPRINT NOTITLES MOD;
TARPHONE = SUBSTR(TARPART2,41,13);
IF _N_ = 1 THEN
PUT // 'FOLLOWING ITEMS WERE UNABLE TO FIND A CORRESPONDING MATCH' //;
IF FIRST.COMPCODE THEN PUT @1 65*'=' / @1 ' NEW COMPANY -' COMPCODE 3. /;
PUT
@1 TARCODE $20. '-' SPECIAL $14. /
@1 TARNAME1 $45. /
@1 TARNAME2 $45. /
@1 TARADDR $45. /
@1 TARCITY ',' TARSTATE ' ' TARZIP /
@1 TARPHONE $13. '-' COMPCODE 3. /
@1 45*'*' /;
IF LAST.COMPCODE THEN PUT @1 65*'=' / @1 'LAST COMPANY -' COMPCODE 3. ;
RETURN;
DATA SASMATCH.NOMATCH; SET NOMATCH3;
** COMBINE THOSE RECORDS THAT WERE MATCHED AND NOT MATCHED AND APPEND;
** THE AGENT MATCH3 FILE SO THAT THESE RECORDS CAN BE RE-CYCLED BACK;
** THROUGH THIS SAME PROGRAM FOR THE NEXT RUN. AGENT MATCH3 FILE ;
** WILL HAVE TO BE RE-NAMED TO AGENT MATCH2 FILE IF ALL GOES WELL IN ;
** EXECUTION OF THIS JOB. PLUS THESE NEW RECORDS WILL HAVE TO BE ;
** UPDATED WITH THE HSB PRODUCER NUMBER WHEN RESEARCH HAS BEEN ;
** COMPLETED;
DATA SAVE;
SET SAVE NOMATCH3;
RUN;
PROC SORT DATA=TARGET OUT=TARGET; BY COMPCODE TARCODE;
RUN;
DATA _NULL_;
SET TARGET;
BY COMPCODE TARCODE;
IF FIRST.TARCODE;
FILE PRIORMAT NOPRINT NOTITLES MOD;
PUT @1 7*'_' @8 TARPART1 $CHAR200. @208 TARPART2 $CHAR71.
@8 SPECIAL $14. ' ' ;
RETURN;
RUN;
DATA SASMATCH.MATCH3; SET SAVE;
RUN;
More code - needed