1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
from PIL import Image
import numpy as np
import pickle,glob,os
arr = [[]]
#number of pictures
n = 1
for infile in glob.glob('D:/py/pics/trees/*.jpg'):
file,ext = os.path.splitext(infile)#分离文件名和扩展名
Img = Image.open(infile)
print(Img.mode,file)#图片尺寸和文件名(用于调试过程中定位错误)
if Img.mode != 'RGB':#将所有非'RGB'通道图片转化为RGB
Img = Img.convert('RGB')
width = Img.size[0]
height = Img.size[1]
print('{} imagesize is:{} X {}'.format(n,width,height))
n += 1
Img = Img.resize([32,32],Image.ANTIALIAS)
#抗锯齿的过滤属性,这些都是为了保证剪切图片的时候,最大降低失真度,这样出
#的图片体积就稍微大些了。
r,g,b = Img.split()
r_array = np.array(r).reshape([1024])
g_array = np.array(g).reshape([1024])
b_array = np.array(b).reshape([1024])
merge_array = np.concatenate((r_array,g_array,b_array))
if arr == [[]]:
arr = [merge_array]
continue
#拼接
arr = np.concatenate((arr,[merge_array]),axis=0)
#打乱顺序
arr = np.random.shuffle(arr)
#生成标签
labelset = np.zeros((arr.shape[0],))
labelset = np.reshape(labelset,[arr.shape[0],])
#字典分割出训练集和测试集
train_dic = {'data':arr[:2000],'labels':labelset[:2000]}
test_dic = {'data':arr[2000:],'labels':labelset[2000:]}
f = open('./data_batch_8','wb')#二进制写模式打开,如果不存在,直接生成
pickle.dump(train_dic,f,protocol=2)
#序列化操作
#由于阿里云平台用的是Python2.7版本,我的是3.6,所以要进行退档操作protocol=2
g = open('./test_batch_1','wb')
pickle.dump(test_dic,g,protocol=2)
|