QQ群成員發言次數統計(正則表達式版)


1、先將QQ群的消息記錄以.txt文件格式導出來,保存路徑及名稱自己定義(在本文我導出到Y盤,命名為test.txt)

2、程序如下:

data statistics1;
    if _n_=1 then do;
        patternid1=prxparse("/\s\w*-*\w*-*\w*\(\d+\)/");
        patternid2=prxparse("/\(\d+\)/");
    end; 
    retain patternid1 patternid2;
    infile 'Y:\test.txt' truncover;
    input string $200.;
    length name $50. qq $30.;
    call prxsubstr(patternid1,string,start1,length1);
    call prxsubstr(patternid2,string,start2,length2);
    if start1 and start2 gt 0 then do;
        name=substrn(string,start1+1,length1-1);
        qq=substrn(string,start2+1,length2-2);
        output;
    end;
run;

data statistics2;
    if _n_=1 then do;
        patternid1=prxparse("/\s\w*-*\w*-*\w*<(\w*|\d*)@(\w*|\d*).\w+>/"); 
        patternid2=prxparse("/<(\w*|\d*)@(\w*|\d*).\w+>/");
    end;
    retain patternid1 patternid2;
    infile 'Y:\test.txt' truncover;
    input string $200.;
    length name $50. qq $30.;
    call prxsubstr(patternid1,string,start1,length1);
    call prxsubstr(patternid2,string,start2,length2);
    if start1 and start2 gt 0 then do;
        name=substrn(string,start1+1,length1-1);
        qq=substrn(string,start2+1,length2-2);
        output;
    end;
run;

data qunname;
    infile 'Y:\test.txt' truncover firstobs=6 obs=6;
    input @'消息對象:' qunname $;
    call symput('_qunname',qunname);
run;

data statistics(drop=patternid1 patternid2 string start1 start2 length1 length2);
    set statistics1 statistics2;
    where qq^='10000';
    n=_n_;
run;

proc sort data=statistics;by qq n;run;

data match;
    set statistics;
    by qq n;
    if last.qq then output;
    drop n;
run;

data _null_;
    set statistics nobs=t;
    call symput("_nobs",t);
    stop;
run;

proc sql;
    create table rtf as
    select qq,n(qq) as frequency,n(qq)/&_nobs.*100 as rate
    from statistics
    group by qq
    order by 1;
quit;

data rtf;
    merge rtf match;
    by qq;
run;

proc sort data=rtf;by descending frequency;run;

options nodate nonumber;
ods results=off;
title;
footnote;
ods listing close;
ods rtf file='Y:\statistics.rtf' style=Mystyles bodytitle; 
proc report data=rtf nowindows style(report)={font_size=10.5pt pretext="QQ群(&_qunname.)成員發言次數及頻率統計分析表" posttext="Author:liyongzhao,Created Date:2013-9-12." just=left} 
    style(column)={font=('times new roman',12pt)};
    column name frequency rate;
    define name/center style(column)={cellwidth=15%} 'QQ群成員';
    define frequency/center style(column)={cellwidth=5%} '發言次數';
    define rate/center style(column)={cellwidth=5%} format=6.3 '發言頻率(%)';
run;
ods rtf close;

3、打開Statistics.rtf即可查看結果。

結果類似下圖(截取開頭部分,隱去QQ群名稱和個人QQ號碼):


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM