常见的机器学习分类有,支持向量机、决策树、KNN、朴素贝叶斯,以前在学习机器学习的时候就简单学过,现在需要用在图像分类上了。一时间蒙了,忘记怎么图像转以前的数据了,现在在GitHub上找到一个不错的例子,特此记录一下。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
# mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题,或者转换负号为字符串

# ----------------------------------------------------------------------------------
# 第一步 切分训练集和测试集
# ----------------------------------------------------------------------------------

X = [] # 定义图像名称
Y = [] # 定义图像分类类标
Z = [] # 定义图像像素
# 记得更改此处4或者10
for i in range(0, 10):
# 遍历文件夹,读取图片
for f in os.listdir("photo2/%s" % i):
# 获取图像名称
X.append("photo2//" + str(i) + "//" + str(f))
# 获取图像类标即为文件夹名称
Y.append(i)
# print(X)
# print(Y)
# os.walk 可以遍历多层路径,使用root, dirs, files
#
# for root, dirs, files in os.walk("photo"):
# # Y.append(type_name)
# for file in files:
# X.append(os.path.join(root, file))
# print(X)
# print(Y)

X = np.array(X)
Y = np.array(Y)

# 随机率为100% 选取其中的20%作为测试集
X_train, X_test, y_train, y_test = train_test_split(X, Y,
test_size=0.2, random_state=1)

print(len(X_train), len(X_test), len(y_train), len(y_test))

# ----------------------------------------------------------------------------------
# 第二步 图像读取及转换为像素直方图
# ----------------------------------------------------------------------------------

# 训练集
XX_train = []
for i in X_train:
# 读取图像
# print i
image = cv2.imdecode(np.fromfile(i, dtype=np.uint8), cv2.IMREAD_COLOR)

# 图像像素大小一致
img = cv2.resize(image, (256, 256),
interpolation=cv2.INTER_CUBIC)

# 计算图像直方图并存储至X数组
hist = cv2.calcHist([img], [0, 1], None,
[256, 256], [0.0, 255.0, 0.0, 255.0])

XX_train.append(((hist / 255).flatten()))

# 测试集
XX_test = []
for i in X_test:
# 读取图像
# print i
# 不使用imread,而是用imdecode以识别中文路径
image = cv2.imdecode(np.fromfile(i, dtype=np.uint8), cv2.IMREAD_COLOR)

# 图像像素大小一致
img = cv2.resize(image, (256, 256),
interpolation=cv2.INTER_CUBIC)

# 计算图像直方图并存储至X数组
hist = cv2.calcHist([img], [0, 1], None,
[256, 256], [0.0, 255.0, 0.0, 255.0])

XX_test.append(((hist / 255).flatten()))

# ----------------------------------------------------------------------------------
# 第三步 基于支持向量机的图像分类处理
# ----------------------------------------------------------------------------------
# 0.5
# 常见核函数‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’
clf = SVC().fit(XX_train, y_train)
clf = SVC(kernel="linear").fit(XX_train, y_train)
predictions_labels = clf.predict(XX_test)

# ----------------------------------------------------------------------------------
# 第三步 基于决策树的图像分类处理
# ----------------------------------------------------------------------------------
# 0.36
# from sklearn.tree import DecisionTreeClassifier
# clf = DecisionTreeClassifier().fit(XX_train, y_train)
# predictions_labels = clf.predict(XX_test)

# ----------------------------------------------------------------------------------
# 第三步 基于KNN的图像分类处理
# ----------------------------------------------------------------------------------
# 0.11
# from sklearn.neighbors import KNeighborsClassifier
# clf = KNeighborsClassifier(n_neighbors=11).fit(XX_train, y_train)
# predictions_labels = clf.predict(XX_test)

# ----------------------------------------------------------------------------------
# 第三步 基于朴素贝叶斯的图像分类处理
# ----------------------------------------------------------------------------------
# 0.01
# from sklearn.naive_bayes import BernoulliNB
# clf = BernoulliNB().fit(XX_train, y_train)
# predictions_labels = clf.predict(XX_test)

print(u'预测结果:')
print(predictions_labels)
print(u'算法评价:')
print(classification_report(y_test, predictions_labels))