1. 寫在前面
微信公眾號:[double12gzh]
關注容器技術、關注
Kubernetes
。問題或建議,請公眾號留言。
在上一篇文章中,我們介紹了GoLang中字符串不同的比較方法,同時也使用一種比較簡單粗暴的方法來一起了下不同方法的執行時間。
在本文中,我們還是會針對前面提到的不同的比較方法,這次,我們使用benchmark的方式來看一下,不同的比較方法的效率。
2. Benchmark測試
2.1 准備測試代碼
hello/utils/str/compare.go
package str
import (
"main/utils/reader"
"strings"
)
// BasicCompare 區分大小寫。使用==進行比較
func BasicCompare(src, dest string) bool {
if len(src) != len(dest) {
return false
}
if src == dest {
return true
} else {
return false
}
}
// CompareCompare 區分大小寫。使用strings包中的Compare進行比較
func CompareCompare(src, dest string) bool {
if len(src) != len(dest) {
return false
}
if strings.Compare(src, dest) == 0 {
return true
} else {
return false
}
}
// CaseInSensitiveBasicCompare 不區分大小寫。使用==進行比較
func CaseInSensitiveBasicCompare(src, dest string) bool {
if len(src) != len(dest) {
return false
}
if strings.ToLower(src) == strings.ToLower(dest) {
return true
} else {
return false
}
}
// CaseInSensitiveCompareCompare 不區分大小寫。使用Compare進行比較
func CaseInSensitiveCompareCompare(src, dest string) bool {
if len(src) != len(dest) {
return false
}
if strings.Compare(strings.ToLower(src), strings.ToLower(dest)) == 0 {
return true
} else {
return false
}
}
// CaseInSensitiveEqualFoldCompare 不區分大小寫。使用EqualFold進行比較
func CaseInSensitiveEqualFoldCompare(a string, b string) bool {
if len(a) != len(b) {
return false
}
if strings.EqualFold(a, b) {
return true
} else {
return false
}
}
hello/utils/str/compare_test.go
package str
import (
"testing"
)
func BenchmarkBasicCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
BasicCompare("you are dog sun, sb wu", "you are dog sun, sb wU")
}
}
func BenchmarkCompareCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
CompareCompare("you are dog sun, sb wu", "you are dog sun, sb wU")
}
}
func BenchmarkCaseInSensitiveBasicCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
CaseInSensitiveBasicCompare("you are dog sun, sb wu", "you are dog sun, sb wU")
}
}
func BenchmarkCaseInSensitiveCompareCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
CaseInSensitiveCompareCompare("you are dog sun, sb wu", "you are dog sun, sb wU")
}
}
func BenchmarkCaseInSensitiveEqualFoldCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
CaseInSensitiveEqualFoldCompare("you are dog sun, sb wu", "you are dog sun, sb wU")
}
}
2.2 執行benchmark測試
jeffrey@DESKTOP-41NV243 MINGW64 ~/Desktop/hello/utils/str
$ go test -bench=.
goos: windows
goarch: amd64
pkg: main/utils/str
BenchmarkBasicCompare-12 379284132 3.13 ns/op
BenchmarkCompareCompare-12 190776010 6.01 ns/op
BenchmarkCaseInSensitiveBasicCompare-12 12153535 105 ns/op
BenchmarkCaseInSensitiveCompareCompare-12 11950690 100 ns/op
BenchmarkCaseInSensitiveEqualFoldCompare-12 33408315 35.6 ns/op
PASS
ok main/utils/str 7.224s
2.3 結論
根據上面的壓測結果,我們可以很容易的得出以下兩點結論:
- 區大小寫的情況,使用==比使用strings.Compare效率更高
- 不區分大小寫的情況,使用EqualFold效率更高
3. 其它測試
通過使用上一節產生的數據樣本,我們進行本節的測試,來看一下不同的比較的方法的執行效率。很明顯,這樣測試時,得出的執行時間肯定是會受到文件讀取時間的影響。
在本節點的測試中,其思路如下:
- 打開文件
- 一行一行獲取文本並進行比較
3.1 前提條件
- 有一個含有200000行的樣本數據
- 目標字符串與樣本最后一行的內容完全一樣
3.2 優點
- 更加切合生產場景
- 可以使用更多的字符串實現不同場景的比較
3.3 測試代碼
hello/utils/str/compare.go
// ReadAndBasicCompare 區分大小寫。讀取文件然后使用==進行比較
func ReadAndBasicCompare(fileName, dest string) bool {
return reader.ReadFileAndCompare(fileName, dest, BasicCompare)
}
// ReadAndCompareCompare 區分大小寫。讀取文件然后使用strings.Compare進行比較
func ReadAndCompareCompare(fileName, dest string) bool {
return reader.ReadFileAndCompare(fileName, dest, CompareCompare)
}
// ReadAndCaseInSensitiveBasicCompare 不區分大小寫。讀取文件然后使用==進行比較
func ReadAndCaseInSensitiveBasicCompare(fileName, dest string) bool {
return reader.ReadFileAndCompare(fileName, dest, CaseInSensitiveBasicCompare)
}
// ReadAndCaseInSensitiveCompareCompare 不區分大小寫。讀取文件然后使用strings.Compare進行比較
func ReadAndCaseInSensitiveCompareCompare(fileName, dest string) bool {
return reader.ReadFileAndCompare(fileName, dest, CaseInSensitiveCompareCompare)
}
// ReadAndCaseInSensitiveEqualFoldCompare 不區分大小寫。讀取文件然后使用EqualFold進行比較
func ReadAndCaseInSensitiveEqualFoldCompare(fileName, dest string) bool {
return reader.ReadFileAndCompare(fileName, dest, CaseInSensitiveEqualFoldCompare)
}
hello/utils/reader/fileReader.go
func ReadFileAndCompare(fileName string, dest string, ComPareFunc func(a, b string) bool) bool {
f, err := os.Open(fileName)
if err != nil {
panic(err)
}
scan := bufio.NewScanner(f)
scan.Split(bufio.ScanLines)
result := false
for scan.Scan() {
cmptString := scan.Text()
if ComPareFunc(cmptString, dest) {
result = true
} else {
result = false
}
}
f.Close()
return result
}
hello/utils/str/compare_test.go
func BenchmarkReadAndBasicCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
ReadAndBasicCompare("../../mock_data.txt", "79rIt3yb8zp5EMRw")
}
}
func BenchmarkReadAndCompareCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
ReadAndCompareCompare("../../mock_data.txt", "79rIt3yb8zp5EMRw")
}
}
func BenchmarkReadAndCaseInSensitiveBasicCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
ReadAndCaseInSensitiveBasicCompare("../../mock_data.txt", "79rIt3yb8zp5EMRw")
}
}
func BenchmarkReadAndCaseInSensitiveCompareCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
ReadAndCaseInSensitiveCompareCompare("../../mock_data.txt", "79rIt3yb8zp5EMRw")
}
}
func BenchmarkReadAndCaseInSensitiveEqualFoldCompare(b *testing.B) {
for n := 0; n < b.N; n++ {
ReadAndCaseInSensitiveEqualFoldCompare("../../mock_data.txt", "79rIt3yb8zp5EMRw")
}
}
3.4 執行測試
jeffrey@hacker ~/Desktop/hello/utils/str go test -bench=.
goos: windows
goarch: amd64
pkg: main/utils/str
...
BenchmarkReadAndBasicCompare
BenchmarkReadAndBasicCompare-12 128 9153796 ns/op
BenchmarkReadAndCompareCompare
BenchmarkReadAndCompareCompare-12 100 10240239 ns/op
BenchmarkReadAndCaseInSensitiveBasicCompare
BenchmarkReadAndCaseInSensitiveBasicCompare-12 27 41919633 ns/op
BenchmarkReadAndCaseInSensitiveCompareCompare
BenchmarkReadAndCaseInSensitiveCompareCompare-12 28 43576511 ns/op
BenchmarkReadAndCaseInSensitiveEqualFoldCompare
BenchmarkReadAndCaseInSensitiveEqualFoldCompare-12 124 9639406 ns/op
PASS
Process finished with exit code 0
3.5 結論
- 區大小寫的情況,使用==比使用strings.Compare效率更高
- 不區分大小寫的情況,使用EqualFold效率更高
4. 添加長度判斷會提高效率嗎
思路: 增加長度判斷
在前面的所有代碼中,我們一開始就是針對字符串進行完全比較,其實,在比較之前,我們可以先獲取一下字符串的長度,如果長度不相等,那么這兩個字符串肯定是不相等的。例如:
// CaseInSensitiveEqualFoldCompare 不區分大小寫。使用EqualFold進行比較
func CaseInSensitiveEqualFoldCompare(a string, b string) bool {
if len(a) != len(b) {
return false
}
if strings.EqualFold(a, b) {
return true
} else {
return false
}
}
修改代碼,加入長度判斷,然后執行benchmark測試得到的結果如下:
goos: windows
goarch: amd64
pkg: main/utils/str
BenchmarkBasicCompare
BenchmarkBasicCompare-12 382882960 3.14 ns/op
BenchmarkCompareCompare
BenchmarkCompareCompare-12 197837797 6.19 ns/op
BenchmarkCaseInSensitiveBasicCompare
BenchmarkCaseInSensitiveBasicCompare-12 11463978 104 ns/op
BenchmarkCaseInSensitiveCompareCompare
BenchmarkCaseInSensitiveCompareCompare-12 11909770 101 ns/op
BenchmarkCaseInSensitiveEqualFoldCompare
BenchmarkCaseInSensitiveEqualFoldCompare-12 32394259 35.3 ns/op
BenchmarkReadAndBasicCompare
BenchmarkReadAndBasicCompare-12 128 9199173 ns/op
BenchmarkReadAndCompareCompare
BenchmarkReadAndCompareCompare-12 100 10155251 ns/op
BenchmarkReadAndCaseInSensitiveBasicCompare
BenchmarkReadAndCaseInSensitiveBasicCompare-12 27 42963244 ns/op
BenchmarkReadAndCaseInSensitiveCompareCompare
BenchmarkReadAndCaseInSensitiveCompareCompare-12 27 44257270 ns/op
BenchmarkReadAndCaseInSensitiveEqualFoldCompare
BenchmarkReadAndCaseInSensitiveEqualFoldCompare-12 122 9650323 ns/op
PASS
不加長度判斷時的benchmark結果:
goos: windows
goarch: amd64
pkg: main/utils/str
BenchmarkBasicCompare
BenchmarkBasicCompare-12 370923894 3.11 ns/op
BenchmarkCompareCompare
BenchmarkCompareCompare-12 198499410 6.12 ns/op
BenchmarkCaseInSensitiveBasicCompare
BenchmarkCaseInSensitiveBasicCompare-12 12226632 108 ns/op
BenchmarkCaseInSensitiveCompareCompare
BenchmarkCaseInSensitiveCompareCompare-12 11799502 101 ns/op
BenchmarkCaseInSensitiveEqualFoldCompare
BenchmarkCaseInSensitiveEqualFoldCompare-12 33342316 35.5 ns/op
BenchmarkReadAndBasicCompare
BenchmarkReadAndBasicCompare-12 128 9153796 ns/op
BenchmarkReadAndCompareCompare
BenchmarkReadAndCompareCompare-12 100 10240239 ns/op
BenchmarkReadAndCaseInSensitiveBasicCompare
BenchmarkReadAndCaseInSensitiveBasicCompare-12 27 41919633 ns/op
BenchmarkReadAndCaseInSensitiveCompareCompare
BenchmarkReadAndCaseInSensitiveCompareCompare-12 28 43576511 ns/op
BenchmarkReadAndCaseInSensitiveEqualFoldCompare
BenchmarkReadAndCaseInSensitiveEqualFoldCompare-12 124 9639406 ns/op
PASS
結論: 增加長度判斷並沒有提高效率,反而還略有下降
5. 總結
- 區大小寫的情況,使用==效率更高
- 不區分大小寫的情況,使用EqualFold效率更高