为了完成人工智能课的大作业,写了这个程序,代码比较简陋(
使用工具
本项目使用Anaconda搭建虚拟环境,所用python版本为3.10。机器学习框架选择PyTorch cuda 11.7版,训练显卡为NVIDIA RTX 2060,显存为6G。展示环节使用了第三方库gradio。
效果展示
详细内容及代码
项目地址:https://github.com/Astrophel02/ShipDetect
使用了https://opensar.sjtu.edu.cn 的opensarship-1 数据集,对数据集进行预处理使之大小适合网络。
处理后的数据集在这里:OneDrive
模型选用简单的AlexNet,最终识别准确率可以达到97%以上。
代码中,CNN_1是训练过程,CNN_2是展示过程,针对实验用数据集的预训练模型在这里:OneDrive,该模型可以直接在CNN_2.py中加载。全部代码如下:
CNN_1:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
#设置超参数
BATCH_SIZE=32
EPOCHS=20
DEVICE = torch.device("cuda") #选择计算硬件
transforms = transforms.Compose([
#transforms.RandomCrop(32,padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
train_path = "D:\\ship\\train" #修改数据集位置
train_dataset = datasets.ImageFolder(train_path, transform=transforms)
test_path = "D:\\ship\\test" #修改数据集位置
test_dataset = datasets.ImageFolder(test_path, transform=transforms)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=6)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=6)
#定义网络
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(3, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 2),
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
model=AlexNet().to(DEVICE)
weights = [1.0,3]
class_weights = torch.FloatTensor(weights).to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=class_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output=model(data)
loss=criterion(output,target)
loss.backward()
optimizer.step()
if (batch_idx + 1) % 30 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model,device,test_loader):
model.eval()
test_loss=0
correct=0
with torch.no_grad():
for data,target in test_loader:
data,target=data.to(device),target.to(device)
output=model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
for epoch in range(EPOCHS):
#test(model, DEVICE, test_loader,loss)
train(model,DEVICE,train_loader,optimizer,epoch)
test(model,DEVICE,test_loader)
#保存模型
MODEL_PATH = "./Model.pth"
torch.save(model,MODEL_PATH)
CNN_2:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import gradio
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(3, 96, 11, 4), # in_channels, out_channels, kernel_size, stride, padding
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2)
)
self.fc = nn.Sequential(
nn.Linear(256*5*5, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 2),
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
#加载模型
device=torch.device("cuda")
model = torch.load("./Model.pth")
model.to(device)
#model.eval()
device=torch.device('cuda')
transform = transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
test_path = "D:\\ship\\test"
test_dataset = datasets.ImageFolder(test_path, transform=transform)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=32, shuffle=False, num_workers=6)
def test(model,device,test_loader):
model.eval()
test_loss=0
correct=0
with torch.no_grad():
for data,target in test_loader:
data,target=data.to(device),target.to(device)
output=model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
test(model, device, test_loader)
#可视化
labels=["未检测到船只","检测到船只"]
def predict(inp):
#model.eval()
img = transforms.ToTensor()(inp)
img = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])(img)
img = img.unsqueeze(0)
img = img.to(device)
with torch.no_grad():
prediction = torch.nn.functional.softmax(model(img)[0], dim=0)
confidences = {labels[i]: float(prediction[i]) for i in range(2)}
return confidences
def test(inp):
img = transforms.ToTensor()(inp)
img = img.unsqueeze(0)
return img.shape
inp = gradio.Image()
io = gradio.Interface(fn=predict,inputs=inp,outputs="label",live=True)
io.launch()
遇到的坑
1)模型选择不恰当。开始选择了VGG-16作为模型,结果无法拟合;
2)结果展示过程忘记对输入图片进行预处理,导致结果不准确;
3)数据集正反样本数量不均匀,导致模型更倾向于预测无船只,解决方法是为损失函数增加权重。
Comments | NOTHING