目錄
如何制作數據集txt,並進行加載
1. 根據圖片制作train.txt val.txt 和 test.txt
# MakeTxt.py
"""
原始圖片保存路徑
**************文件保存方式 一 *******************
--data
--train
--category_1
--category_2
...
--val
--category_1
--category_2
...
--test
--category_1
--category_2
...
**************文件保存方式 二 *******************
--data
--category_1
--001.jpg
--002.jpg
...
--category_2
--001.jpg
--002.jpg
...
...
"""
import os
# 【1. 獲取文件絕對路徑】
def get_files_path(file_dir):
dir_names = []
for fn in os.listdir(file_dir): #fn 表示的是文件名
dir_names.append(os.path.join(file_dir,fn))
print(dir_names)
return dir_names
def get_dir_img(file_dir):
filenames = []
for root, dirs, files in os.walk(file_dir):
for name in files:
# print(os.path.join(root, name))
filenames.append(os.path.join(root, name))
return filenames
def make_txt(img_root,txt_root,quantity_proportion):
"""
img_root : 圖像保存路徑
txt_root : txt路徑
quantity_proportion : [train,eval,test] 各數據集的比例 eg. [0.8, 0.1, 0.1]
"""
# 創建 txt文件
txt_name = [txt_root + '/train.txt', txt_root +'/val.txt', txt_root + '/test.txt']
for file_path in txt_name:
if os.path.isfile(file_path):
os.remove(file_path)
train = open(txt_name[0],'a')
val = open(txt_name[1], 'a')
test = open(txt_name[2], 'a')
sort_files = get_files_path(img_root)
for i,file in enumerate(sort_files):
tem_total_img = get_dir_img(file)
num_img = len(tem_total_img)
span_num = [int(x* num_img) for x in quantity_proportion]
print(span_num,num_img)
for j in range(span_num[0]+1):
train.write(tem_total_img[j] + ' ' + str(i) + '\n')
for k in range(span_num[0]+1,span_num[0]+span_num[1]+1):
val.write(tem_total_img[k] + ' ' + str(i) + '\n')
for p in range(span_num[0]+span_num[1]+1,num_img):
test.write(tem_total_img[p] + ' ' + str(i) + '\n')
def main():
quantity_proportion = [0.8, 0.1, 0.1]
make_txt('./pokeman','./dataTxt',quantity_proportion)
pass
if __name__=='__main__':
main()
2. torch.utils.data.Dataset
用來制作數據集
- Image.open(path).convert('RGB') 圖片轉換成RGB通道,對應后面的網絡輸入channel改為3
#***************************一些必要的包的調用********************************
import torch.nn.functional as F
import torch
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.models as models
from torchvision import transforms, utils
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torch.optim as optim
import os
#***************************初始化一些函數********************************
gpu_id =0
torch.cuda.set_device(gpu_id)#使用GPU
learning_rate = 0.0001 #學習率的設置
#*************************************數據集的設置****************************************************************************
#定義讀取文件的格式
def default_loader(path):
return Image.open(path).convert('RGB')
class MyDataset(Dataset):
#創建自己的類: MyDataset,這個類是繼承的torch.utils.data.Dataset
#********************************** #使用__init__()初始化一些需要傳入的參數及數據集的調用**********************
def __init__(self, txt, transform=None,target_transform=None, loader=default_loader):
# super(MyDataset, self).__init__()
#對繼承自父類的屬性進行初始化
fh = open(txt, 'r')
imgs = []
# 按照傳入的路徑和txt文本參數以只讀的方式打開這個文本
for line in fh: #迭代該列表#按行循環txt文本中的內
line = line.strip('\n')
line = line.rstrip('\n')# 刪除 本行string 字符串末尾的指定字符,這個方法的詳細介紹自己查詢python
words = line.split() #用split將該行分割成列表 split的默認參數是空格,所以不傳遞任何參數時分割空格
imgs.append((words[0],int(words[1])))
#把txt里的內容讀入imgs列表保存,具體是words幾要看txt內容而定
# 很顯然,根據我剛才截圖所示txt的內容,words[0]是圖片信息,words[1]是lable
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
self.loader = loader
#*************************** #使用__getitem__()對數據進行預處理並返回想要的信息**********************
def __getitem__(self, index):#這個方法是必須要有的,用於按照索引讀取每個元素的具體內容
fn, label = self.imgs[index]
#fn是圖片path #fn和label分別獲得imgs[index]也即是剛才每行中word[0]和word[1]的信息
img = self.loader(fn) # 按照路徑讀取圖片
if self.transform is not None:
img = self.transform(img) #數據標簽轉換為Tensor
return img,label
#return回哪些內容,那么我們在訓練時循環讀取每個batch時,就能獲得哪些內容
#********************************** #使用__len__()初始化一些需要傳入的參數及數據集的調用**********************
def __len__(self):
#這個函數也必須要寫,它返回的是數據集的長度,也就是多少張圖片,要和loader的長度作區分
return len(self.imgs)
def my_data_set(txt_root = './dataTxt/',batchsize=32,resize=227):
#圖像的初始化操作
_transforms = transforms.Compose([
transforms.RandomResizedCrop((resize,resize)),
transforms.ToTensor(),])
train_data = MyDataset(txt=txt_root + 'train.txt', transform=_transforms)
test_data = MyDataset(txt=txt_root + 'test.txt', transform=_transforms)
val_data = MyDataset(txt=txt_root + 'val.txt', transform=_transforms)
train_loader = DataLoader(dataset=train_data, batch_size=batchsize, shuffle=True,num_workers=1)
test_loader = DataLoader(dataset=test_data, batch_size=batchsize, shuffle=False,num_workers=1)
val_loader = DataLoader(dataset=val_data, batch_size=batchsize, shuffle=False,num_workers=1)
print('num_of_trainData:', len(train_data))
print('num_of_testData:', len(test_data))
print('num_of_valData:', len(val_data))
return train_loader,test_loader,val_loader
# print(train_data)
3. 搭建RestNet18進行測試
import torch
import time
from torch import nn,optim
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
import MyDataset as md
import sys
sys.path.append("..")
from Pytorch_learning import pytorch_deep as pyd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 創建殘差塊 rest block
class Residual(nn.Module):
def __init__(self, in_channels,out_channels,use_1x1conv = False,stride = 1):
super(Residual,self).__init__()
self.conv1 = nn.Conv2d(in_channels,out_channels,kernel_size=3,padding = 1, stride = stride)
self.conv2 = nn.Conv2d(out_channels,out_channels,kernel_size=3,padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(in_channels,out_channels,kernel_size=3,padding = 1, stride = stride)
else:self.conv3 = None
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self,X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return F.relu(X+Y)
def resnet_block(in_channels, out_channels, num_residuals,first_block=False):
if first_block:
assert in_channels == out_channels # 第⼀個模塊的通道數同輸⼊通道數⼀致
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels,use_1x1conv=True, stride=2))
else:
blk.append(Residual(out_channels, out_channels))
return nn.Sequential(*blk)
def my_restnet_18(num_classes= 6):
net = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
net.add_module("resnet_block1", resnet_block(64, 64, 2,first_block=True))
net.add_module("resnet_block2", resnet_block(64, 128, 2))
net.add_module("resnet_block3", resnet_block(128, 256, 2))
net.add_module("resnet_block4", resnet_block(256, 512, 2))
net.add_module("global_avg_pool", pyd.GlobalAvgPool2d()) #GlobalAvgPool2d的輸出: (Batch, 512, 1, 1)
net.add_module("fc", nn.Sequential(pyd.FlattenLayer(),nn.Linear(512, num_classes)))
return net
def my_train(save_path = './weight/restNet18_3.pth',resize=96,batch_size = 32):
## Training
net = my_restnet_18(num_classes= 6)
# 如出現“out of memory”的報錯信息,可減⼩batch_size或resize
train_iter, test_iter,val_iter =md.my_data_set(batchsize=batch_size,resize=resize)
# train_iter, test_iter = pyd.load_data_fashion_mnist(batch_size,resize)
lr, num_epochs = 0.001, 10
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
###訓練過程
net = net.to(device)
print("training on ", device)
## 損失函數 交叉熵損失
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0,time.time()
for X, y in train_iter:
# print(X)
# print(len(X.data))
X = X.to(device) # 數據放到GPU
y = y.to(device)
y_hat = net(X) #得到網絡輸出結果
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) ==y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = pyd.evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f,time %.1f sec'%
(epoch + 1, train_l_sum / batch_count,train_acc_sum / n, test_acc, time.time() - start))
# torch.save(net,save_path)
def my_test(pth_path = './weight/restNet18.pth',resize = 96,batch_size = 32):
model_net = torch.load(pth_path)
train_iter, test_iter,val_iter =md.my_data_set(batchsize=batch_size,resize=resize)
# 預測正確的數量和總數量
correct = 0
total = 0
# 使用torch.no_grad的話在前向傳播中不記錄梯度,節省內存
# cv2.namedWindow('predictPic', cv2.WINDOW_NORMAL)
# to_pil_image = transforms.ToPILImage()
with torch.no_grad():
for images,labels in val_iter:
# images, labels = data
# print(images)
print(len(images.data))
images, labels = images.to(device), labels.to(device)
# 預測
# outputs = self.net(images)
outputs = model_net(images)
# 我們的網絡輸出的實際上是個概率分布,去最大概率的哪一項作為預測分類
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
# print(images.data[0])
# print(len(images.data[0]))
for i in range(len(images.data)):
print('實際標簽 {},預測標簽 {}'.format(labels[i],predicted[i]))
def test_one_img(img_path, pth_path = './weight/restNet18_2.pth',resize = 96):
img = Image.open(img_path).convert('RGBA')
test_transforms = transforms.Compose([
transforms.RandomResizedCrop((resize,resize)),
transforms.ToTensor(),])
img = test_transforms(img)
img = torch.unsqueeze(img, 0)
model_net = torch.load(pth_path)
with torch.no_grad():
img = img.to(device)
# 預測
# outputs = self.net(images)
outputs = model_net(img)
print(outputs)
# # 我們的網絡輸出的實際上是個概率分布,去最大概率的哪一項作為預測分類
_, predicted = torch.max(outputs.data, 1)
print('predicted = {}'.format(predicted.item()))
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
def main():
save_path = './weight/restNet18_3.pth'
pth_path = save_path
resize = 227
batch_size = 32
#訓練
my_train(save_path,resize,batch_size)
# 測試
# my_test(pth_path,resize,batch_size)
# 測試一張圖片
# img_path = './pokeman/mewtwo/00000036.jpg'
# test_one_img(img_path,pth_path,resize)
pass
if __name__=='__main__':
main()
訓練結果
- 訓練結果不太理想,主要數據集圖片中有些多目標,不利於分類,另外數據量較少
pytorch_deep.py文件下載
- 程序中有相關調用,以上傳,需要的可以下載
https://download.csdn.net/download/wangxiaobei2017/14031906