1、先將QQ群的消息記錄以.txt文件格式導出來,保存路徑及名稱自己定義(在本文我導出到Y盤,命名為test.txt)
2、程序如下:
data statistics1; if _n_=1 then do; patternid1=prxparse("/\s\w*-*\w*-*\w*\(\d+\)/"); patternid2=prxparse("/\(\d+\)/"); end; retain patternid1 patternid2; infile 'Y:\test.txt' truncover; input string $200.; length name $50. qq $30.; call prxsubstr(patternid1,string,start1,length1); call prxsubstr(patternid2,string,start2,length2); if start1 and start2 gt 0 then do; name=substrn(string,start1+1,length1-1); qq=substrn(string,start2+1,length2-2); output; end; run; data statistics2; if _n_=1 then do; patternid1=prxparse("/\s\w*-*\w*-*\w*<(\w*|\d*)@(\w*|\d*).\w+>/"); patternid2=prxparse("/<(\w*|\d*)@(\w*|\d*).\w+>/"); end; retain patternid1 patternid2; infile 'Y:\test.txt' truncover; input string $200.; length name $50. qq $30.; call prxsubstr(patternid1,string,start1,length1); call prxsubstr(patternid2,string,start2,length2); if start1 and start2 gt 0 then do; name=substrn(string,start1+1,length1-1); qq=substrn(string,start2+1,length2-2); output; end; run; data qunname; infile 'Y:\test.txt' truncover firstobs=6 obs=6; input @'消息對象:' qunname $; call symput('_qunname',qunname); run; data statistics(drop=patternid1 patternid2 string start1 start2 length1 length2); set statistics1 statistics2; where qq^='10000'; n=_n_; run; proc sort data=statistics;by qq n;run; data match; set statistics; by qq n; if last.qq then output; drop n; run; data _null_; set statistics nobs=t; call symput("_nobs",t); stop; run; proc sql; create table rtf as select qq,n(qq) as frequency,n(qq)/&_nobs.*100 as rate from statistics group by qq order by 1; quit; data rtf; merge rtf match; by qq; run; proc sort data=rtf;by descending frequency;run; options nodate nonumber; ods results=off; title; footnote; ods listing close; ods rtf file='Y:\statistics.rtf' style=Mystyles bodytitle; proc report data=rtf nowindows style(report)={font_size=10.5pt pretext="QQ群(&_qunname.)成員發言次數及頻率統計分析表" posttext="Author:liyongzhao,Created Date:2013-9-12." just=left} style(column)={font=('times new roman',12pt)}; column name frequency rate; define name/center style(column)={cellwidth=15%} 'QQ群成員'; define frequency/center style(column)={cellwidth=5%} '發言次數'; define rate/center style(column)={cellwidth=5%} format=6.3 '發言頻率(%)'; run; ods rtf close;
3、打開Statistics.rtf即可查看結果。
結果類似下圖(截取開頭部分,隱去QQ群名稱和個人QQ號碼):