朴素貝葉斯算法
公式
解釋(例子)
給出如下訓練集:
編號,色澤,根蒂,敲聲,紋理,臍部,觸感,好瓜
1,青綠,蜷縮,濁響,清晰,凹陷,硬滑,是
2,烏黑,蜷縮,沉悶,清晰,凹陷,硬滑,是
3,烏黑,蜷縮,濁響,清晰,凹陷,硬滑,是
4,青綠,蜷縮,沉悶,清晰,凹陷,硬滑,是
5,淺白,蜷縮,濁響,清晰,凹陷,硬滑,是
6,青綠,稍蜷,濁響,清晰,稍凹,軟粘,是
7,烏黑,稍蜷,濁響,稍糊,稍凹,軟粘,是
8,烏黑,稍蜷,濁響,清晰,稍凹,硬滑,是
9,烏黑,稍蜷,沉悶,稍糊,稍凹,硬滑,否
10,青綠,硬挺,清脆,清晰,平坦,軟粘,否
11,淺白,硬挺,清脆,模糊,平坦,硬滑,否
12,淺白,蜷縮,濁響,模糊,平坦,軟粘,否
13,青綠,稍蜷,濁響,稍糊,凹陷,硬滑,否
14,淺白,稍蜷,沉悶,稍糊,凹陷,硬滑,否
15,烏黑,稍蜷,濁響,清晰,稍凹,軟粘,否
16,淺白,蜷縮,濁響,模糊,平坦,硬滑,否
17,青綠,蜷縮,沉悶,稍糊,稍凹,硬滑,否
判斷有如下特征的瓜是否好瓜:
青綠,稍蜷,濁響,清晰,凹陷,硬滑
分析
將特征和規律代入貝葉斯公式中:
P(好|青綠,稍蜷,濁響,清晰,凹陷,硬滑)
= P(好)*P(青綠,稍蜷,濁響,清晰,凹陷,硬滑|好) / (P(好)*P(青綠,稍蜷,濁響,清晰,凹陷,硬滑|好) + P(否)*P(青綠,稍蜷,濁響,清晰,凹陷,硬滑|否))
= P(好)*P(青綠|好)*P(稍蜷|好)*P(濁響|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好) / (P(好)*P(青綠|好)*P(稍蜷|好)*P(濁響|好)*P(清晰|好)*P(,凹陷|好)*P(硬滑|好) + P(否)*P(青綠|否)*P(稍蜷|否)*P(濁響|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否))
其中特征各項與規律相互獨立,故:
P(青綠,稍蜷,濁響,清晰,凹陷,硬滑|好) = P(青綠|好)*P(稍蜷|好)*P(濁響|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好)
P(青綠,稍蜷,濁響,清晰,凹陷,硬滑|否) = P(青綠|否)*P(稍蜷|否)*P(濁響|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否)
代碼實現:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.io.*;
import java.util.Scanner;
public class Test {
static String filePath = System.getProperty("user.dir")+"\\src\\sources\\data.txt";
static ArrayList<arraylist<string>> data = new ArrayList<arraylist<string>>();
//從文件中讀取數據,儲存到集合data中
public ArrayList<arraylist<string>> readTable(String filePath){
ArrayList<string> d = null;
ArrayList<arraylist<string>> t = new ArrayList<arraylist<string>>();
File file = new File(filePath);
try {
InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
BufferedReader bf = new BufferedReader(isr);
String str = null;
while((str = bf.readLine()) != null) {
d = new ArrayList<string>();
String[] str1 = str.split(",");
for(int i = 1; i < str1.length ; i++) {
d.add(str1[i]);
}
t.add(d);
data = t;
}
bf.close();
isr.close();
} catch (Exception e) {
e.printStackTrace();
System.out.println("文件不存在!");
}
return t;
}
//計算分母的值
public double denominator(String[] str) {
double result = 0;
int countIsHao = 0;
int countIsCha = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
int count4 = 0;
int count5 = 0;
int count6 = 0;
for(int i = 0; i < data.size() ;i++) {
if(data.get(i).get(6).equals("是")) {
countIsHao++;
}
if(data.get(i).get(6).equals("否")) {
countIsCha++;
}
if(data.get(i).get(0).equals(str[0])) {
count1++;
}
if(data.get(i).get(1).equals(str[1])) {
count2++;
}
if(data.get(i).get(2).equals(str[2])) {
count3++;
}
if(data.get(i).get(3).equals(str[3])) {
count4++;
}
if(data.get(i).get(4).equals(str[4])) {
count5++;
}
if(data.get(i).get(5).equals(str[5])) {
count6++;
}
}
result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0))+
(countIsCha/(data.size()*1.0))*(count1 / (countIsCha*1.0))*(count2 / (countIsCha*1.0))*(count3 / (countIsCha*1.0))*(count4 / (countIsCha*1.0))*(count5 / (countIsCha*1.0))*(count6 / (countIsCha*1.0));
return result;
}
//計算分子的值
public double moleculeIsCold(String hao,String[] strs) {
double result = 0;
int countIsHao = 0;
int count1 = 0;
int count2 = 0;
int count3 = 0;
int count4 = 0;
int count5 = 0;
int count6 = 0;
for(int i = 0; i < data.size() ;i++) {
if(data.get(i).get(6).equals(hao)) {
countIsHao++;
}
if(data.get(i).get(0).equals(strs[0]) && data.get(i).get(6).equals(hao)) {
count1++;
}
if(data.get(i).get(1).equals(strs[1]) && data.get(i).get(6).equals(hao)) {
count2++;
}
if(data.get(i).get(2).equals(strs[2]) && data.get(i).get(6).equals(hao)) {
count3++;
}
if(data.get(i).get(3).equals(strs[3]) && data.get(i).get(6).equals(hao)) {
count4++;
}
if(data.get(i).get(4).equals(strs[4]) && data.get(i).get(6).equals(hao)) {
count5++;
}
if(data.get(i).get(5).equals(strs[5]) && data.get(i).get(6).equals(hao)) {
count6++;
}
}
result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0));
return result;
}
//比較好瓜差瓜的概率
public String compared(String[] strs) {
String str = "";
double d1 ,d2 ;
d1 = moleculeIsCold("是",strs)*1.0 / denominator(strs);
d2 = moleculeIsCold("否",strs)*1.0 / denominator(strs);
if(d1 > d2) {
str = "好瓜";
}else {
str = "差瓜";
}
System.out.println("好瓜的概率:"+d1);
System.out.println("差瓜的概率:"+d2);
System.out.println(str);
return str;
}
//測試用例:青綠 稍蜷 濁響 清晰 凹陷 硬滑
public static void main(String[] args) {
Scanner input = new Scanner(System.in);
String[] strs = new String[6];
for (int i = 0;i<6;i++){
strs[i] = input.next();
}
Test ba = new Test();
ba.readTable(filePath);
ba.denominator(strs);
ba.moleculeIsCold("是",strs);
ba.moleculeIsCold("否",strs);
ba.compared(strs);
}
}