IOS使用正則表達式去掉html中的標簽元素,獲得純文本
content是根據網址獲得的網頁源碼字符串
NSRegularExpression
*regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"<[^>]*>|\n"
options
:
0
error
:nil
];
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
//替換所有html和換行匹配元素為"-"
regularExpretion=[
NSRegularExpression
regularExpressionWithPattern
:
@"-{1,}"
options
:
0
error
:nil
] ;
content=[regularExpretion
stringByReplacingMatchesInString
:content
options
:
NSMatchingReportProgress
range
:NSMakeRange(
0
, content
.length
)
withTemplate
:
@"-"
];
//把多個"-"匹配為一個"-"
//根據"-"分割到數組
NSArray
*arr=[
NSArray
array
];
content=[
NSString
stringWithString
:content];
arr = [content
componentsSeparatedByString
:
@"-"
];
NSMutableArray
*marr=[
NSMutableArray
arrayWithArray
:arr];
[marr
removeObject
:
@""
];
return
marr;
//正則去除網絡標簽 -(NSString *)getZZwithString:(NSString *)string{ NSRegularExpression *regularExpretion=[NSRegularExpression regularExpressionWithPattern:@"<[^>]*>|\n" options:0 error:nil]; string=[regularExpretion stringByReplacingMatchesInString:string options:NSMatchingReportProgress range:NSMakeRange(0, string.length) withTemplate:@""]; return string; }