/* -------------------------------------------------------- -------------- */ /* Codes for Ontario Health User Group Presentation */ /* on */ /* Key word analysis */ /* -------------------------------------------------------- -------------- */ /*INDEX function - searches for patterns as separate words or as parts of words - */ /*INDEXC function - searches for any characters that are present in the string - < searches source, for the first occurrence of any character present in the excerpts>*/ /*INDEXW function - searches for strings that are words - */ /*FIND searches for a specific substring of characters within a character*/ /* Proc IML string that you specify*/ /*FINDC searches for specific characters that either appear or do not appear*/ /*within a character string that you specify*/ /*INDEX searches a character expression for a string of characters*/ /*INDEXC searches a character expression for specific characters*/ /*INDEXW searches a character expression for a specified string as a word*/ /* This program was created for SAS Institute presentation on 01 April 2011 by Wilson Suraweera*/ %macro Kw_search(KW=, Var_KW_out=, Var_KW_in=, Data_IN=, Data_OUT=); /*------------------------ Data_IN='Dataset name', Data_OUT='Output dataset name', KW='Keyword' string wants to search Var_KW_in='Keyword variable name where keyword strings to be searched in data', Var_KW_out= 'Out put variable name' ------------------------*/ data &Data_OUT; set &Data_IN ; up_KWord=upcase(substr(&Var_KW_in,1,255)); /* Caps the Keyword stings and capture the string from whare you want: Change parameters 1 and 255*/ Indx_W = indexw(up_KWord, &KW ); /* Complete word */ Indx = index(up_KWord, &KW ); /* Complete word OR word in a part of other word*/ SOUND_KW1 = SOUNDEX(&KW); /* eg. KW="FEVER" */ S_ind=0; array word {50} $ 20 word1-word50; array S {50} $ 20 S1-S50; /* temporary indicator variable to keep KW availability */ do i=1 to 50; word(i)=scan( up_KWord, i, ' ' ); /* Explode the string into words */ SOUND_KW2 = SOUNDEX(word(i)); if SOUND_KW1 =SOUND_KW2 then S(i)=1; else S(i)=0; S_ind=S_ind+S(i); /* Keyword appearence indicator*/ end; drop i up_KWord SOUND_KW1 SOUND_KW2 word1-word50 S1-S50 ; /* Drop temporary variables*/ /* Output variable */ if (Indx_W>0) then &Var_KW_out=1; /* Most probable */ else if (Indx>0 and S_ind>0) then &Var_KW_out=2; /* probable */ else if (Indx>0 and S_ind=0) then &Var_KW_out=3; /* Less probable */ else if (Indx=0 and S_ind>0) then &Var_KW_out=4; /* Least probable*/ else &Var_KW_out=0; %mend; /* ----------------------------------------------------------------- */ Libname text 'E:\SAS Text Analysis\Presentations'; /*options mprint;*/ /*Examples:*/ %Kw_search(KW='HIGH', Var_KW_out=HIGH, Var_KW_in=KWord, Data_IN=text.t1, Data_OUT=t_out); run; %Kw_search(KW='FEVER', Var_KW_out=FEVER, Var_KW_in=KWord, Data_IN=t_out, Data_OUT=t_out); run; %Kw_search(KW='CHILL', Var_KW_out=CHILL, Var_KW_in=KWord, Data_IN=t_out, Data_OUT=t_out); run; /* Data_IN='Input Dataset name', Data_OUT='Output Dataset name', Var_KW='Keyword variable name', KW='Keyword' string wants to search, Var_KW_out='Out put variable name' */