在velocity优化时由于要将String转化为byte,所以就会涉及到一些针对byte数组的操作需要,如在一个数组中查找一个小数组、数组替换、数组扩展等操作,下面这个类就提供了这样一组方法,而且性能还不错。
package com.taobao.sketch.util;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
/**
* ArrayUtil,一些基于byte数组的操作方法集
* <p/>
* Author By: junshan
* Created Date: 2010-12-27 16:17:23
*/
public class ArrayUtil {
/**
* 查找并替换指定byte数组
*
* @param org of type byte[] 原数组
* @param search of type byte[] 要查找的数组
* @param replace of type byte[] 要替换的数组
* @param startIndex of type int 开始搜索索引
* @return byte[] 返回新的数组
* @throws UnsupportedEncodingException when
*/
public static byte[] arrayReplace(byte[] org, byte[] search, byte[] replace, int startIndex) throws UnsupportedEncodingException {
int index = indexOf(org, search, startIndex);
if (index != -1) {
int newLength = org.length + replace.length – search.length;
byte[] newByte = new byte[newLength];
System.arraycopy(org, 0, newByte, 0, index);
System.arraycopy(replace, 0, newByte, index, replace.length);
System.arraycopy(org, index + search.length, newByte, index + replace.length, org.length – index – search.length);
int newStart = index + replace.length;
//String newstr = new String(newByte, “GBK”);
//System.out.println(newstr);
if ((newByte.length – newStart) > replace.length) {
return arrayReplace(newByte, search, replace, newStart);
}
return newByte;
} else {
return org;
}
}
/**
* 从指定数组的copy一个子数组并返回
*
* @param org of type byte[] 原数组
* @param to 合并一个byte[]
* @return 合并的数据
*/
public static byte[] append(byte[] org, byte[] to) {
byte[] newByte = new byte[org.length + to.length];
System.arraycopy(org, 0, newByte, 0, org.length);
System.arraycopy(to, 0, newByte, org.length, to.length);
return newByte;
}
/**
* 从指定数组的copy一个子数组并返回
*
* @param org of type byte[] 原数组
* @param to 合并一个byte
* @return 合并的数据
*/
public static byte[] append(byte[] org, byte to) {
byte[] newByte = new byte[org.length + 1];
System.arraycopy(org, 0, newByte, 0, org.length);
newByte[org.length] = to;
return newByte;
}
/**
* 从指定数组的copy一个子数组并返回
*
* @param org of type byte[] 原数组
* @param from 起始点
* @param append 要合并的数据
*/
public static void append(byte[] org, int from, byte[] append) {
System.arraycopy(append, 0, org, from, append.length);
}
/**
* 从指定数组的copy一个子数组并返回
*
* @param original of type byte[] 原数组
* @param from 起始点
* @param to 结束点
* @return 返回copy的数组
*/
public static byte[] copyOfRange(byte[] original, int from, int to) {
int newLength = to – from;
if (newLength < 0)
throw new IllegalArgumentException(from + ” > ” + to);
byte[] copy = new byte[newLength];
System.arraycopy(original, from, copy, 0,
Math.min(original.length – from, newLength));
return copy;
}
public static byte[] char2byte(String encode, char… chars) {
Charset cs = Charset.forName(encode);
CharBuffer cb = CharBuffer.allocate(chars.length);
cb.put(chars);
cb.flip();
ByteBuffer bb = cs.encode(cb);
return bb.array();
}
/**
* 查找指定数组的起始索引
*
* @param org of type byte[] 原数组
* @param search of type byte[] 要查找的数组
* @return int 返回索引
*/
public static int indexOf(byte[] org, byte[] search) {
return indexOf(org, search, 0);
}
/**
* 查找指定数组的起始索引
*
* @param org of type byte[] 原数组
* @param search of type byte[] 要查找的数组
* @param startIndex 起始索引
* @return int 返回索引
*/
public static int indexOf(byte[] org, byte[] search, int startIndex) {
KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();
kmpMatcher.computeFailure4Byte(search);
return kmpMatcher.indexOf(org, startIndex);
//return com.alibaba.common.lang.ArrayUtil.indexOf(org, search);
}
/**
* 查找指定数组的最后一次出现起始索引
*
* @param org of type byte[] 原数组
* @param search of type byte[] 要查找的数组
* @return int 返回索引
*/
public static int lastIndexOf(byte[] org, byte[] search) {
return lastIndexOf(org, search, 0);
}
/**
* 查找指定数组的最后一次出现起始索引
*
* @param org of type byte[] 原数组
* @param search of type byte[] 要查找的数组
* @param fromIndex 起始索引
* @return int 返回索引
*/
public static int lastIndexOf(byte[] org, byte[] search, int fromIndex) {
KMPMatcher kmpMatcher = new com.taobao.sketch.util.ArrayUtil.KMPMatcher();
kmpMatcher.computeFailure4Byte(search);
return kmpMatcher.lastIndexOf(org, fromIndex);
}
/**
* KMP算法类
* <p/>
* Created on 2011-1-3
*/
static class KMPMatcher {
private int[] failure;
private int matchPoint;
private byte[] bytePattern;
/**
* Method indexOf …
*
* @param text of type byte[]
* @param startIndex of type int
* @return int
*/
public int indexOf(byte[] text, int startIndex) {
int j = 0;
if (text.length == 0 || startIndex > text.length) return -1;
for (int i = startIndex; i < text.length; i++) {
while (j > 0 && bytePattern[j] != text[i]) {
j = failure[j - 1];
}
if (bytePattern[j] == text[i]) {
j++;
}
if (j == bytePattern.length) {
matchPoint = i – bytePattern.length + 1;
return matchPoint;
}
}
return -1;
}
/**
* 找到末尾后重头开始找
*
* @param text of type byte[]
* @param startIndex of type int
* @return int
*/
public int lastIndexOf(byte[] text, int startIndex) {
matchPoint = -1;
int j = 0;
if (text.length == 0 || startIndex > text.length) return -1;
int end = text.length;
for (int i = startIndex; i < end; i++) {
while (j > 0 && bytePattern[j] != text[i]) {
j = failure[j - 1];
}
if (bytePattern[j] == text[i]) {
j++;
}
if (j == bytePattern.length) {
matchPoint = i – bytePattern.length + 1;
if ((text.length – i) > bytePattern.length) {
j = 0;
continue;
}
return matchPoint;
}
//如果从中间某个位置找,找到末尾没找到后,再重头开始找
if (startIndex != 0 && i + 1 == end) {
end = startIndex;
i = -1;
startIndex = 0;
}
}
return matchPoint;
}
/**
* 找到末尾后不会重头开始找
*
* @param text of type byte[]
* @param startIndex of type int
* @return int
*/
public int lastIndexOfWithNoLoop(byte[] text, int startIndex) {
matchPoint = -1;
int j = 0;
if (text.length == 0 || startIndex > text.length) return -1;
for (int i = startIndex; i < text.length; i++) {
while (j > 0 && bytePattern[j] != text[i]) {
j = failure[j - 1];
}
if (bytePattern[j] == text[i]) {
j++;
}
if (j == bytePattern.length) {
matchPoint = i – bytePattern.length + 1;
if ((text.length – i) > bytePattern.length) {
j = 0;
continue;
}
return matchPoint;
}
}
return matchPoint;
}
/**
* Method computeFailure4Byte …
*
* @param patternStr of type byte[]
*/
public void computeFailure4Byte(byte[] patternStr) {
bytePattern = patternStr;
int j = 0;
int len = bytePattern.length;
failure = new int[len];
for (int i = 1; i < len; i++) {
while (j > 0 && bytePattern[j] != bytePattern[i]) {
j = failure[j - 1];
}
if (bytePattern[j] == bytePattern[i]) {
j++;
}
failure[i] = j;
}
}
}
public static void main(String[] args) {
try {
byte[] org = “kadeadedcfdededghkk”.getBytes(“GBK”);
byte[] search = “kk”.getBytes(“GBK”);
int last = lastIndexOf(org, search, 19);
long t1 = 0;
long t2 = 0;
int f1 = 0;
int f2 = 0;
for (int i = 0; i < 10000; i++) {
long s1 = System.nanoTime();
f1 = indexOf(org, search, 0);
long s2 = System.nanoTime();
f2 = com.alibaba.common.lang.ArrayUtil.indexOf(org, search);
long s3 = System.nanoTime();
t1 = t1 + (s2 – s1);
t2 = t2 + (s3 – s2);
}
System.out.println(“kmp=” + t1 / 10000 + “,ali=” + t2 / 10000);
System.out.printf(“f1=” + f1 + “,f2=” + f2);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
}
.net
在 stream流 和 byte[] 中查找(搜索)指定字符串
这里注重看的是两个 Search 的扩展方法,一个是 stream 类型的扩展,另一个是 byte[] 类型的扩展,
如果大家有更好的“算法”,请给回复,我们一起优化!
-- 常用扩展代码,需要这部分代码的支持!
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Drawing;
namespace Ims.Bll
{
/// <summary>
/// stream 、 string 、byte[] 间的转换扩展方法类
/// </summary>
public static class StreamExtend
{
#region Stream 扩展
/// <summary>
/// Stream Stream 转换为 byte 数组
/// </summary>
/// <returns></returns>
public static byte[] ToByteArray(this Stream stream)
{
byte[] bytes = new byte[stream.Length];
stream.Read(bytes, 0, bytes.Length);
// 设置当前流的位置为流的开始
stream.Seek(0, SeekOrigin.Begin);
return bytes;
}
/// <summary>
/// Stream 转换为 image 图片
/// </summary>
/// <returns></returns>
public static Image ToImage(this Stream stream)
{
Image img = new Bitmap(stream);
return img;
}
/// <summary>
/// Stream 转换为 string ,使用 Encoding.Default 编码
/// </summary>
/// <returns></returns>
public static string ToStr(this Stream stream)
{
return System.Text.Encoding.Default.GetString(stream.ToByteArray());
}
/// <summary>
/// 在当前流中搜索指定的 byte[]
/// </summary>
/// <param name="arr"></param>
/// <param name="key">搜索关键字</param>
/// <param name="beginPosition">搜索开始位置</param>
/// <returns>如果存在则返回byte[]在流中首次出现的位置,否则返回 -1</returns>
public static long Search(this Stream stream, long beginPosition, byte[] key)
{
if (stream == null || stream.Length <= beginPosition)
return -1;
if (key == null || stream.Length < key.Length)
return -1;
long i=-1;
long j = -1;
int currentByte = int.MinValue;
for(i=beginPosition;i<stream.Length;i++)
{
if (stream.Length < key.Length + i)
break;
stream.Seek(i, SeekOrigin.Begin);
for (j = 0; j < key.Length; j++)
{
currentByte = stream.ReadByte();
if (currentByte != key[j])
break;
}
if (j == key.Length)
return i;
if(currentByte == -1)
break;
}
return -1;
}
#endregion
#region byte[] 扩展
/// <summary>
/// byte[] 转换为 stream 流
/// </summary>
/// <returns></returns>
public static Stream ToStream(this byte[] arr)
{
Stream stream = new MemoryStream(arr);
// 设置当前流的位置为流的开始 www.2cto.com
stream.Seek(0, SeekOrigin.Begin);
return stream;
}
/// <summary>
/// byte[] 转换为 Image
/// </summary>
/// <returns></returns>
public static Image ToImage(this byte[] arr)
{
return Image.FromStream(arr.ToStream());
}
/// <summary>
/// 转换为 string,使用 Encoding.Default 编码
/// </summary>
/// <returns></returns>
public static string ToStr(this byte[] arr)
{
return System.Text.Encoding.Default.GetString(arr);
}
/// <summary>
/// 搜索
/// </summary>
/// <param name="arr"></param>
/// <param name="key">搜索关键字</param>
/// <param name="beginPos">搜索开始位置</param>
/// <returns></returns>
public static int Search(this byte[] arr, int beginPosition, byte[] key)
{
if (arr == null || arr.Length <= beginPosition)
return -1;
if (key == null || arr.Length < key.Length)
return -1;
int i = -1;
int j = -1;
for (i = beginPosition; i < arr.Length; i++)
{
if (arr.Length < key.Length + i)
break;
for (j = 0; j < key.Length; j++)
{
if (arr[i+j] != key[j])
break;
}
if (j == key.Length)
return i;
}
return -1;
}
#endregion
#region string 扩展
/// <summary>
/// string 转换为 byte[]
/// </summary>
/// <returns></returns>
public static byte[] ToByteArray(this string str)
{
return System.Text.Encoding.Default.GetBytes(str);
}
/// <summary>
/// string 转换为 Stream
/// </summary>
/// <returns></returns>
public static Stream ToStream(this string str)
{
Stream stream = new MemoryStream(str.ToByteArray());
// 设置当前流的位置为流的开始
stream.Seek(0, SeekOrigin.Begin);
return stream;
}
#endregion
}
}
------------------------
-- 测试脚本
byte[] arr = "0123456789111".ToByteArray();
byte[] key1 = "123".ToByteArray();
byte[] key2 = "678".ToByteArray();
byte[] key3 = "911".ToByteArray();
byte[] key4 = "111".ToByteArray();
//流内搜索测试
Stream sm = arr.ToStream();
long index1 = sm.Search(0, key1);
long index2 = sm.Search(0, key2);
long index3 = sm.Search(0, key3);
long index4 = sm.Search(0, key4);
//byte[]内搜索测试
long index10 = arr.Search(0, key1);
long index20 = arr.Search(0, key2);
long index30 = arr.Search(0, key3);
long index40 = arr.Search(0, key4);