SVM 支持向量机简介+SVM15种场景分类实例( 二 )


class_names = [name[11:] for name in glob.glob('D:/Computer_Vision/scene15/*')] #读取目录下所有文件夹名for i in range(len(class_names)):#提取15个类别名class_names[i]=class_names[i].split('\\')[1]
class_names = dict(zip(range(0,len(class_names)), class_names))#对15种类别进行labelencodingprint (class_names)
def load_dataset(path, num_per_class=-1): #通过遍历读取每个文件夹中的图片 , num_per_class设置每个类别中读取的图片数 , 默认全部读取data = http://www.kingceram.com/post/[]labels = []for id, class_name in class_names.items():img_path_class = glob.glob(path + class_name + '/*.jpg')if num_per_class> 0:img_path_class = img_path_class[:num_per_class]labels.extend([id]*len(img_path_class))for filename in img_path_class:data.append(cv2.pyrDown(cv2.imread(filename), 0))return data, labels
X, y = load_dataset('D:/Computer_Vision/scene15/') #调用load_dataset()函数构建数据集及标签集X_num= len(X)train_data,test_data,train_label,test_label= train_test_split(X,y, test_size=0.2,#随机抽取划分训练集与测试集,stratify的设置保留了原数据集中的样本分布shuffle=True, random_state=111, stratify=y)
def computeSIFT(data):x = []for i in range(0, len(data)):sift = cv2.xfeatures2d.SIFT_create()#构建SIFT特征提取器img = data[i]step_size = 15#设置固定步长进行网格采样kp = [cv2.KeyPoint(x, y, step_size) for x in range(0, img.shape[0], step_size) for y in range(0, img.shape[1], step_size)]dense_feat = sift.compute(img, kp)#计算SIFT特征x.append(dense_feat[1])return x
x_train = computeSIFT(train_data) #对训练集和测试集分别计算SIFT特征x_test = computeSIFT(test_data)
all_train_desc = []#通过遍历展开训练集的listfor i in range(len(x_train)):for j in range(x_train[i].shape[0]):all_train_desc.append(x_train[i][j,:])all_train_desc = np.array(all_train_desc)
def clusterFeatures(all_train_desc, k):#k表示聚类中心数即单词数kmeans = KMeans(n_clusters=k, random_state=0,n_jobs=2).fit(all_train_desc) #创建K-means模型 , n_jobs指定并行内核数return kmeans
def formTrainingSetHistogram(x_train, kmeans, k): train_hist = []for i in range(len(x_train)):data = http://www.kingceram.com/post/copy.deepcopy(x_train[i])predict = kmeans.predict(data)train_hist.append(np.bincount(predict, minlength=k).reshape(1,-1).ravel()) #对每幅图的SIFT特征进行直方图统计return np.array(train_hist)
k = 50kmeans = clusterFeatures(all_train_desc, k) #进行kmeans聚类
train_hist = formTrainingSetHistogram(x_train, kmeans, k)#生成训练集和测试集的直方图集test_hist = formTrainingSetHistogram(x_test, kmeans, k)
scaler = preprocessing.StandardScaler().fit(train_hist) #进行归一化train_hist = scaler.transform(train_hist)test_hist = scaler.transform(test_hist)
svm = sklearn.svm.SVC(kernel='linear',class_weight='balanced',probability=True) #使用线性支持向量机进行训练及预测svm.fit(train_hist, train_label)predict=svm.predict(test_hist)print('准确率是:%s'%(accuracy_score(test_label,predict)))print(classification_report(test_label,predict)) #输出其在测试集上的表现print(confusion_matrix(test_label,predict))