first commit
commit
56c2884ec8
|
@ -0,0 +1,68 @@
|
|||
import torch
|
||||
import numpy as np
|
||||
from models.experimental import attempt_load
|
||||
from utils.general import non_max_suppression, scale_coords, letterbox
|
||||
from utils.torch_utils import select_device
|
||||
from utils.BaseDetector import baseDet
|
||||
|
||||
|
||||
class Detector(baseDet):
|
||||
|
||||
def __init__(self):
|
||||
super(Detector, self).__init__()
|
||||
self.init_model()
|
||||
self.build_config()
|
||||
|
||||
def init_model(self):
|
||||
|
||||
self.weights = 'weights/yolov5m.pt'
|
||||
self.device = '0' if torch.cuda.is_available() else 'cpu'
|
||||
self.device = select_device(self.device)
|
||||
model = attempt_load(self.weights, map_location=self.device)
|
||||
model.to(self.device).eval()
|
||||
model.half()
|
||||
# torch.save(model, 'test.pt')
|
||||
self.m = model
|
||||
self.names = model.module.names if hasattr(
|
||||
model, 'module') else model.names
|
||||
|
||||
def preprocess(self, img):
|
||||
|
||||
img0 = img.copy()
|
||||
img = letterbox(img, new_shape=self.img_size)[0]
|
||||
img = img[:, :, ::-1].transpose(2, 0, 1)
|
||||
img = np.ascontiguousarray(img)
|
||||
img = torch.from_numpy(img).to(self.device)
|
||||
img = img.half() # 半精度
|
||||
img /= 255.0 # 图像归一化
|
||||
if img.ndimension() == 3:
|
||||
img = img.unsqueeze(0)
|
||||
|
||||
return img0, img
|
||||
|
||||
def detect(self, im):
|
||||
|
||||
im0, img = self.preprocess(im)
|
||||
|
||||
pred = self.m(img, augment=False)[0]
|
||||
pred = pred.float()
|
||||
pred = non_max_suppression(pred, self.threshold, 0.4)
|
||||
|
||||
pred_boxes = []
|
||||
for det in pred:
|
||||
|
||||
if det is not None and len(det):
|
||||
det[:, :4] = scale_coords(
|
||||
img.shape[2:], det[:, :4], im0.shape).round()
|
||||
|
||||
for *x, conf, cls_id in det:
|
||||
lbl = self.names[int(cls_id)]
|
||||
if not lbl in ['person', 'car', 'truck']:
|
||||
continue
|
||||
x1, y1 = int(x[0]), int(x[1])
|
||||
x2, y2 = int(x[2]), int(x[3])
|
||||
pred_boxes.append(
|
||||
(x1, y1, x2, y2, lbl, conf))
|
||||
|
||||
return im, pred_boxes
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
DEEPSORT:
|
||||
REID_CKPT: "deep_sort/deep_sort/deep/checkpoint/ckpt.t7"
|
||||
MAX_DIST: 0.2
|
||||
MIN_CONFIDENCE: 0.3
|
||||
NMS_MAX_OVERLAP: 0.5
|
||||
MAX_IOU_DISTANCE: 0.7
|
||||
MAX_AGE: 70
|
||||
N_INIT: 3
|
||||
NN_BUDGET: 100
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
# Deep Sort
|
||||
|
||||
This is the implemention of deep sort with pytorch.
|
|
@ -0,0 +1,21 @@
|
|||
from .deep_sort import DeepSort
|
||||
|
||||
|
||||
__all__ = ['DeepSort', 'build_tracker']
|
||||
|
||||
|
||||
def build_tracker(cfg, use_cuda):
|
||||
return DeepSort(cfg.DEEPSORT.REID_CKPT,
|
||||
max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
|
||||
nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
|
||||
max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,15 @@
|
|||
import torch
|
||||
|
||||
features = torch.load("features.pth")
|
||||
qf = features["qf"]
|
||||
ql = features["ql"]
|
||||
gf = features["gf"]
|
||||
gl = features["gl"]
|
||||
|
||||
scores = qf.mm(gf.t())
|
||||
res = scores.topk(5, dim=1)[1][:,0]
|
||||
top1correct = gl[res].eq(ql).sum().item()
|
||||
|
||||
print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
|
||||
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
import numpy as np
|
||||
import cv2
|
||||
import logging
|
||||
|
||||
from .model import Net
|
||||
|
||||
class Extractor(object):
|
||||
def __init__(self, model_path, use_cuda=True):
|
||||
self.net = Net(reid=True)
|
||||
self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
|
||||
state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
|
||||
self.net.load_state_dict(state_dict)
|
||||
logger = logging.getLogger("root.tracker")
|
||||
logger.info("Loading weights from {}... Done!".format(model_path))
|
||||
self.net.to(self.device)
|
||||
self.size = (64, 128)
|
||||
self.norm = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
|
||||
])
|
||||
|
||||
|
||||
|
||||
def _preprocess(self, im_crops):
|
||||
"""
|
||||
TODO:
|
||||
1. to float with scale from 0 to 1
|
||||
2. resize to (64, 128) as Market1501 dataset did
|
||||
3. concatenate to a numpy array
|
||||
3. to torch Tensor
|
||||
4. normalize
|
||||
"""
|
||||
def _resize(im, size):
|
||||
return cv2.resize(im.astype(np.float32)/255., size)
|
||||
|
||||
im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
|
||||
return im_batch
|
||||
|
||||
|
||||
def __call__(self, im_crops):
|
||||
im_batch = self._preprocess(im_crops)
|
||||
with torch.no_grad():
|
||||
im_batch = im_batch.to(self.device)
|
||||
features = self.net(im_batch)
|
||||
return features.cpu().numpy()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
|
||||
extr = Extractor("checkpoint/ckpt.t7")
|
||||
feature = extr(img)
|
||||
print(feature.shape)
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, c_in, c_out,is_downsample=False):
|
||||
super(BasicBlock,self).__init__()
|
||||
self.is_downsample = is_downsample
|
||||
if is_downsample:
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
|
||||
else:
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(c_out)
|
||||
self.relu = nn.ReLU(True)
|
||||
self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(c_out)
|
||||
if is_downsample:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
elif c_in != c_out:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
self.is_downsample = True
|
||||
|
||||
def forward(self,x):
|
||||
y = self.conv1(x)
|
||||
y = self.bn1(y)
|
||||
y = self.relu(y)
|
||||
y = self.conv2(y)
|
||||
y = self.bn2(y)
|
||||
if self.is_downsample:
|
||||
x = self.downsample(x)
|
||||
return F.relu(x.add(y),True)
|
||||
|
||||
def make_layers(c_in,c_out,repeat_times, is_downsample=False):
|
||||
blocks = []
|
||||
for i in range(repeat_times):
|
||||
if i ==0:
|
||||
blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
|
||||
else:
|
||||
blocks += [BasicBlock(c_out,c_out),]
|
||||
return nn.Sequential(*blocks)
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self, num_classes=751 ,reid=False):
|
||||
super(Net,self).__init__()
|
||||
# 3 128 64
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3,64,3,stride=1,padding=1),
|
||||
nn.BatchNorm2d(64),
|
||||
nn.ReLU(inplace=True),
|
||||
# nn.Conv2d(32,32,3,stride=1,padding=1),
|
||||
# nn.BatchNorm2d(32),
|
||||
# nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(3,2,padding=1),
|
||||
)
|
||||
# 32 64 32
|
||||
self.layer1 = make_layers(64,64,2,False)
|
||||
# 32 64 32
|
||||
self.layer2 = make_layers(64,128,2,True)
|
||||
# 64 32 16
|
||||
self.layer3 = make_layers(128,256,2,True)
|
||||
# 128 16 8
|
||||
self.layer4 = make_layers(256,512,2,True)
|
||||
# 256 8 4
|
||||
self.avgpool = nn.AvgPool2d((8,4),1)
|
||||
# 256 1 1
|
||||
self.reid = reid
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(512, 256),
|
||||
nn.BatchNorm1d(256),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Dropout(),
|
||||
nn.Linear(256, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
x = self.avgpool(x)
|
||||
x = x.view(x.size(0),-1)
|
||||
# B x 128
|
||||
if self.reid:
|
||||
x = x.div(x.norm(p=2,dim=1,keepdim=True))
|
||||
return x
|
||||
# classifier
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
net = Net()
|
||||
x = torch.randn(4,3,128,64)
|
||||
y = net(x)
|
||||
import ipdb; ipdb.set_trace()
|
||||
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class BasicBlock(nn.Module):
|
||||
def __init__(self, c_in, c_out,is_downsample=False):
|
||||
super(BasicBlock,self).__init__()
|
||||
self.is_downsample = is_downsample
|
||||
if is_downsample:
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
|
||||
else:
|
||||
self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
|
||||
self.bn1 = nn.BatchNorm2d(c_out)
|
||||
self.relu = nn.ReLU(True)
|
||||
self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(c_out)
|
||||
if is_downsample:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
elif c_in != c_out:
|
||||
self.downsample = nn.Sequential(
|
||||
nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
|
||||
nn.BatchNorm2d(c_out)
|
||||
)
|
||||
self.is_downsample = True
|
||||
|
||||
def forward(self,x):
|
||||
y = self.conv1(x)
|
||||
y = self.bn1(y)
|
||||
y = self.relu(y)
|
||||
y = self.conv2(y)
|
||||
y = self.bn2(y)
|
||||
if self.is_downsample:
|
||||
x = self.downsample(x)
|
||||
return F.relu(x.add(y),True)
|
||||
|
||||
def make_layers(c_in,c_out,repeat_times, is_downsample=False):
|
||||
blocks = []
|
||||
for i in range(repeat_times):
|
||||
if i ==0:
|
||||
blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
|
||||
else:
|
||||
blocks += [BasicBlock(c_out,c_out),]
|
||||
return nn.Sequential(*blocks)
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self, num_classes=625 ,reid=False):
|
||||
super(Net,self).__init__()
|
||||
# 3 128 64
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(3,32,3,stride=1,padding=1),
|
||||
nn.BatchNorm2d(32),
|
||||
nn.ELU(inplace=True),
|
||||
nn.Conv2d(32,32,3,stride=1,padding=1),
|
||||
nn.BatchNorm2d(32),
|
||||
nn.ELU(inplace=True),
|
||||
nn.MaxPool2d(3,2,padding=1),
|
||||
)
|
||||
# 32 64 32
|
||||
self.layer1 = make_layers(32,32,2,False)
|
||||
# 32 64 32
|
||||
self.layer2 = make_layers(32,64,2,True)
|
||||
# 64 32 16
|
||||
self.layer3 = make_layers(64,128,2,True)
|
||||
# 128 16 8
|
||||
self.dense = nn.Sequential(
|
||||
nn.Dropout(p=0.6),
|
||||
nn.Linear(128*16*8, 128),
|
||||
nn.BatchNorm1d(128),
|
||||
nn.ELU(inplace=True)
|
||||
)
|
||||
# 256 1 1
|
||||
self.reid = reid
|
||||
self.batch_norm = nn.BatchNorm1d(128)
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Linear(128, num_classes),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
|
||||
x = x.view(x.size(0),-1)
|
||||
if self.reid:
|
||||
x = self.dense[0](x)
|
||||
x = self.dense[1](x)
|
||||
x = x.div(x.norm(p=2,dim=1,keepdim=True))
|
||||
return x
|
||||
x = self.dense(x)
|
||||
# B x 128
|
||||
# classifier
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
net = Net(reid=True)
|
||||
x = torch.randn(4,3,128,64)
|
||||
y = net(x)
|
||||
import ipdb; ipdb.set_trace()
|
||||
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from model import Net
|
||||
|
||||
parser = argparse.ArgumentParser(description="Train on market1501")
|
||||
parser.add_argument("--data-dir",default='data',type=str)
|
||||
parser.add_argument("--no-cuda",action="store_true")
|
||||
parser.add_argument("--gpu-id",default=0,type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
# device
|
||||
device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
|
||||
if torch.cuda.is_available() and not args.no_cuda:
|
||||
cudnn.benchmark = True
|
||||
|
||||
# data loader
|
||||
root = args.data_dir
|
||||
query_dir = os.path.join(root,"query")
|
||||
gallery_dir = os.path.join(root,"gallery")
|
||||
transform = torchvision.transforms.Compose([
|
||||
torchvision.transforms.Resize((128,64)),
|
||||
torchvision.transforms.ToTensor(),
|
||||
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
])
|
||||
queryloader = torch.utils.data.DataLoader(
|
||||
torchvision.datasets.ImageFolder(query_dir, transform=transform),
|
||||
batch_size=64, shuffle=False
|
||||
)
|
||||
galleryloader = torch.utils.data.DataLoader(
|
||||
torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
|
||||
batch_size=64, shuffle=False
|
||||
)
|
||||
|
||||
# net definition
|
||||
net = Net(reid=True)
|
||||
assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
|
||||
print('Loading from checkpoint/ckpt.t7')
|
||||
checkpoint = torch.load("./checkpoint/ckpt.t7")
|
||||
net_dict = checkpoint['net_dict']
|
||||
net.load_state_dict(net_dict, strict=False)
|
||||
net.eval()
|
||||
net.to(device)
|
||||
|
||||
# compute features
|
||||
query_features = torch.tensor([]).float()
|
||||
query_labels = torch.tensor([]).long()
|
||||
gallery_features = torch.tensor([]).float()
|
||||
gallery_labels = torch.tensor([]).long()
|
||||
|
||||
with torch.no_grad():
|
||||
for idx,(inputs,labels) in enumerate(queryloader):
|
||||
inputs = inputs.to(device)
|
||||
features = net(inputs).cpu()
|
||||
query_features = torch.cat((query_features, features), dim=0)
|
||||
query_labels = torch.cat((query_labels, labels))
|
||||
|
||||
for idx,(inputs,labels) in enumerate(galleryloader):
|
||||
inputs = inputs.to(device)
|
||||
features = net(inputs).cpu()
|
||||
gallery_features = torch.cat((gallery_features, features), dim=0)
|
||||
gallery_labels = torch.cat((gallery_labels, labels))
|
||||
|
||||
gallery_labels -= 2
|
||||
|
||||
# save features
|
||||
features = {
|
||||
"qf": query_features,
|
||||
"ql": query_labels,
|
||||
"gf": gallery_features,
|
||||
"gl": gallery_labels
|
||||
}
|
||||
torch.save(features,"features.pth")
|
Binary file not shown.
After Width: | Height: | Size: 59 KiB |
|
@ -0,0 +1,189 @@
|
|||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torchvision
|
||||
|
||||
from model import Net
|
||||
|
||||
parser = argparse.ArgumentParser(description="Train on market1501")
|
||||
parser.add_argument("--data-dir",default='data',type=str)
|
||||
parser.add_argument("--no-cuda",action="store_true")
|
||||
parser.add_argument("--gpu-id",default=0,type=int)
|
||||
parser.add_argument("--lr",default=0.1, type=float)
|
||||
parser.add_argument("--interval",'-i',default=20,type=int)
|
||||
parser.add_argument('--resume', '-r',action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
# device
|
||||
device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
|
||||
if torch.cuda.is_available() and not args.no_cuda:
|
||||
cudnn.benchmark = True
|
||||
|
||||
# data loading
|
||||
root = args.data_dir
|
||||
train_dir = os.path.join(root,"train")
|
||||
test_dir = os.path.join(root,"test")
|
||||
transform_train = torchvision.transforms.Compose([
|
||||
torchvision.transforms.RandomCrop((128,64),padding=4),
|
||||
torchvision.transforms.RandomHorizontalFlip(),
|
||||
torchvision.transforms.ToTensor(),
|
||||
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
])
|
||||
transform_test = torchvision.transforms.Compose([
|
||||
torchvision.transforms.Resize((128,64)),
|
||||
torchvision.transforms.ToTensor(),
|
||||
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
])
|
||||
trainloader = torch.utils.data.DataLoader(
|
||||
torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
|
||||
batch_size=64,shuffle=True
|
||||
)
|
||||
testloader = torch.utils.data.DataLoader(
|
||||
torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
|
||||
batch_size=64,shuffle=True
|
||||
)
|
||||
num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes))
|
||||
|
||||
# net definition
|
||||
start_epoch = 0
|
||||
net = Net(num_classes=num_classes)
|
||||
if args.resume:
|
||||
assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
|
||||
print('Loading from checkpoint/ckpt.t7')
|
||||
checkpoint = torch.load("./checkpoint/ckpt.t7")
|
||||
# import ipdb; ipdb.set_trace()
|
||||
net_dict = checkpoint['net_dict']
|
||||
net.load_state_dict(net_dict)
|
||||
best_acc = checkpoint['acc']
|
||||
start_epoch = checkpoint['epoch']
|
||||
net.to(device)
|
||||
|
||||
# loss and optimizer
|
||||
criterion = torch.nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
|
||||
best_acc = 0.
|
||||
|
||||
# train function for each epoch
|
||||
def train(epoch):
|
||||
print("\nEpoch : %d"%(epoch+1))
|
||||
net.train()
|
||||
training_loss = 0.
|
||||
train_loss = 0.
|
||||
correct = 0
|
||||
total = 0
|
||||
interval = args.interval
|
||||
start = time.time()
|
||||
for idx, (inputs, labels) in enumerate(trainloader):
|
||||
# forward
|
||||
inputs,labels = inputs.to(device),labels.to(device)
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
# backward
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# accumurating
|
||||
training_loss += loss.item()
|
||||
train_loss += loss.item()
|
||||
correct += outputs.max(dim=1)[1].eq(labels).sum().item()
|
||||
total += labels.size(0)
|
||||
|
||||
# print
|
||||
if (idx+1)%interval == 0:
|
||||
end = time.time()
|
||||
print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
|
||||
100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
|
||||
))
|
||||
training_loss = 0.
|
||||
start = time.time()
|
||||
|
||||
return train_loss/len(trainloader), 1.- correct/total
|
||||
|
||||
def test(epoch):
|
||||
global best_acc
|
||||
net.eval()
|
||||
test_loss = 0.
|
||||
correct = 0
|
||||
total = 0
|
||||
start = time.time()
|
||||
with torch.no_grad():
|
||||
for idx, (inputs, labels) in enumerate(testloader):
|
||||
inputs, labels = inputs.to(device), labels.to(device)
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
test_loss += loss.item()
|
||||
correct += outputs.max(dim=1)[1].eq(labels).sum().item()
|
||||
total += labels.size(0)
|
||||
|
||||
print("Testing ...")
|
||||
end = time.time()
|
||||
print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
|
||||
100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
|
||||
))
|
||||
|
||||
# saving checkpoint
|
||||
acc = 100.*correct/total
|
||||
if acc > best_acc:
|
||||
best_acc = acc
|
||||
print("Saving parameters to checkpoint/ckpt.t7")
|
||||
checkpoint = {
|
||||
'net_dict':net.state_dict(),
|
||||
'acc':acc,
|
||||
'epoch':epoch,
|
||||
}
|
||||
if not os.path.isdir('checkpoint'):
|
||||
os.mkdir('checkpoint')
|
||||
torch.save(checkpoint, './checkpoint/ckpt.t7')
|
||||
|
||||
return test_loss/len(testloader), 1.- correct/total
|
||||
|
||||
# plot figure
|
||||
x_epoch = []
|
||||
record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
|
||||
fig = plt.figure()
|
||||
ax0 = fig.add_subplot(121, title="loss")
|
||||
ax1 = fig.add_subplot(122, title="top1err")
|
||||
def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
|
||||
global record
|
||||
record['train_loss'].append(train_loss)
|
||||
record['train_err'].append(train_err)
|
||||
record['test_loss'].append(test_loss)
|
||||
record['test_err'].append(test_err)
|
||||
|
||||
x_epoch.append(epoch)
|
||||
ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
|
||||
ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
|
||||
ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
|
||||
ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
|
||||
if epoch == 0:
|
||||
ax0.legend()
|
||||
ax1.legend()
|
||||
fig.savefig("train.jpg")
|
||||
|
||||
# lr decay
|
||||
def lr_decay():
|
||||
global optimizer
|
||||
for params in optimizer.param_groups:
|
||||
params['lr'] *= 0.1
|
||||
lr = params['lr']
|
||||
print("Learning rate adjusted to {}".format(lr))
|
||||
|
||||
def main():
|
||||
for epoch in range(start_epoch, start_epoch+40):
|
||||
train_loss, train_err = train(epoch)
|
||||
test_loss, test_err = test(epoch)
|
||||
draw_curve(epoch, train_loss, train_err, test_loss, test_err)
|
||||
if (epoch+1)%20==0:
|
||||
lr_decay()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,115 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
from .deep.feature_extractor import Extractor
|
||||
from .sort.nn_matching import NearestNeighborDistanceMetric
|
||||
from .sort.preprocessing import non_max_suppression
|
||||
from .sort.detection import Detection
|
||||
from .sort.tracker import Tracker
|
||||
|
||||
|
||||
__all__ = ['DeepSort']
|
||||
|
||||
|
||||
class DeepSort(object):
|
||||
def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
|
||||
self.min_confidence = min_confidence
|
||||
self.nms_max_overlap = nms_max_overlap
|
||||
|
||||
self.extractor = Extractor(model_path, use_cuda=use_cuda)
|
||||
|
||||
max_cosine_distance = max_dist
|
||||
nn_budget = 100
|
||||
metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
|
||||
self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
|
||||
|
||||
def update(self, bbox_xywh, confidences, ori_img):
|
||||
self.height, self.width = ori_img.shape[:2]
|
||||
# generate detections
|
||||
features = self._get_features(bbox_xywh, ori_img)
|
||||
bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
|
||||
detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence]
|
||||
|
||||
# run on non-maximum supression
|
||||
boxes = np.array([d.tlwh for d in detections])
|
||||
scores = np.array([d.confidence for d in detections])
|
||||
indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
|
||||
detections = [detections[i] for i in indices]
|
||||
|
||||
# update tracker
|
||||
self.tracker.predict()
|
||||
self.tracker.update(detections)
|
||||
|
||||
# output bbox identities
|
||||
outputs = []
|
||||
for track in self.tracker.tracks:
|
||||
if not track.is_confirmed() or track.time_since_update > 1:
|
||||
continue
|
||||
box = track.to_tlwh()
|
||||
x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
|
||||
track_id = track.track_id
|
||||
outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
|
||||
if len(outputs) > 0:
|
||||
outputs = np.stack(outputs,axis=0)
|
||||
return outputs
|
||||
|
||||
|
||||
"""
|
||||
TODO:
|
||||
Convert bbox from xc_yc_w_h to xtl_ytl_w_h
|
||||
Thanks JieChen91@github.com for reporting this bug!
|
||||
"""
|
||||
@staticmethod
|
||||
def _xywh_to_tlwh(bbox_xywh):
|
||||
if isinstance(bbox_xywh, np.ndarray):
|
||||
bbox_tlwh = bbox_xywh.copy()
|
||||
elif isinstance(bbox_xywh, torch.Tensor):
|
||||
bbox_tlwh = bbox_xywh.clone()
|
||||
bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2.
|
||||
bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2.
|
||||
return bbox_tlwh
|
||||
|
||||
|
||||
def _xywh_to_xyxy(self, bbox_xywh):
|
||||
x,y,w,h = bbox_xywh
|
||||
x1 = max(int(x-w/2),0)
|
||||
x2 = min(int(x+w/2),self.width-1)
|
||||
y1 = max(int(y-h/2),0)
|
||||
y2 = min(int(y+h/2),self.height-1)
|
||||
return x1,y1,x2,y2
|
||||
|
||||
def _tlwh_to_xyxy(self, bbox_tlwh):
|
||||
"""
|
||||
TODO:
|
||||
Convert bbox from xtl_ytl_w_h to xc_yc_w_h
|
||||
Thanks JieChen91@github.com for reporting this bug!
|
||||
"""
|
||||
x,y,w,h = bbox_tlwh
|
||||
x1 = max(int(x),0)
|
||||
x2 = min(int(x+w),self.width-1)
|
||||
y1 = max(int(y),0)
|
||||
y2 = min(int(y+h),self.height-1)
|
||||
return x1,y1,x2,y2
|
||||
|
||||
def _xyxy_to_tlwh(self, bbox_xyxy):
|
||||
x1,y1,x2,y2 = bbox_xyxy
|
||||
|
||||
t = x1
|
||||
l = y1
|
||||
w = int(x2-x1)
|
||||
h = int(y2-y1)
|
||||
return t,l,w,h
|
||||
|
||||
def _get_features(self, bbox_xywh, ori_img):
|
||||
im_crops = []
|
||||
for box in bbox_xywh:
|
||||
x1,y1,x2,y2 = self._xywh_to_xyxy(box)
|
||||
im = ori_img[y1:y2,x1:x2]
|
||||
im_crops.append(im)
|
||||
if im_crops:
|
||||
features = self.extractor(im_crops)
|
||||
else:
|
||||
features = np.array([])
|
||||
return features
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,49 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Detection(object):
|
||||
"""
|
||||
This class represents a bounding box detection in a single image.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tlwh : array_like
|
||||
Bounding box in format `(x, y, w, h)`.
|
||||
confidence : float
|
||||
Detector confidence score.
|
||||
feature : array_like
|
||||
A feature vector that describes the object contained in this image.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
tlwh : ndarray
|
||||
Bounding box in format `(top left x, top left y, width, height)`.
|
||||
confidence : ndarray
|
||||
Detector confidence score.
|
||||
feature : ndarray | NoneType
|
||||
A feature vector that describes the object contained in this image.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, tlwh, confidence, feature):
|
||||
self.tlwh = np.asarray(tlwh, dtype=np.float)
|
||||
self.confidence = float(confidence)
|
||||
self.feature = np.asarray(feature, dtype=np.float32)
|
||||
|
||||
def to_tlbr(self):
|
||||
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
|
||||
`(top left, bottom right)`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[2:] += ret[:2]
|
||||
return ret
|
||||
|
||||
def to_xyah(self):
|
||||
"""Convert bounding box to format `(center x, center y, aspect ratio,
|
||||
height)`, where the aspect ratio is `width / height`.
|
||||
"""
|
||||
ret = self.tlwh.copy()
|
||||
ret[:2] += ret[2:] / 2
|
||||
ret[2] /= ret[3]
|
||||
return ret
|
|
@ -0,0 +1,81 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import linear_assignment
|
||||
|
||||
|
||||
def iou(bbox, candidates):
|
||||
"""Computer intersection over union.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
bbox : ndarray
|
||||
A bounding box in format `(top left x, top left y, width, height)`.
|
||||
candidates : ndarray
|
||||
A matrix of candidate bounding boxes (one per row) in the same format
|
||||
as `bbox`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The intersection over union in [0, 1] between the `bbox` and each
|
||||
candidate. A higher score means a larger fraction of the `bbox` is
|
||||
occluded by the candidate.
|
||||
|
||||
"""
|
||||
bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
|
||||
candidates_tl = candidates[:, :2]
|
||||
candidates_br = candidates[:, :2] + candidates[:, 2:]
|
||||
|
||||
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
|
||||
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
|
||||
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
|
||||
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
|
||||
wh = np.maximum(0., br - tl)
|
||||
|
||||
area_intersection = wh.prod(axis=1)
|
||||
area_bbox = bbox[2:].prod()
|
||||
area_candidates = candidates[:, 2:].prod(axis=1)
|
||||
return area_intersection / (area_bbox + area_candidates - area_intersection)
|
||||
|
||||
|
||||
def iou_cost(tracks, detections, track_indices=None,
|
||||
detection_indices=None):
|
||||
"""An intersection over union distance metric.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tracks : List[deep_sort.track.Track]
|
||||
A list of tracks.
|
||||
detections : List[deep_sort.detection.Detection]
|
||||
A list of detections.
|
||||
track_indices : Optional[List[int]]
|
||||
A list of indices to tracks that should be matched. Defaults to
|
||||
all `tracks`.
|
||||
detection_indices : Optional[List[int]]
|
||||
A list of indices to detections that should be matched. Defaults
|
||||
to all `detections`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns a cost matrix of shape
|
||||
len(track_indices), len(detection_indices) where entry (i, j) is
|
||||
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
|
||||
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = np.arange(len(tracks))
|
||||
if detection_indices is None:
|
||||
detection_indices = np.arange(len(detections))
|
||||
|
||||
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
if tracks[track_idx].time_since_update > 1:
|
||||
cost_matrix[row, :] = linear_assignment.INFTY_COST
|
||||
continue
|
||||
|
||||
bbox = tracks[track_idx].to_tlwh()
|
||||
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
|
||||
cost_matrix[row, :] = 1. - iou(bbox, candidates)
|
||||
return cost_matrix
|
|
@ -0,0 +1,229 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
|
||||
|
||||
"""
|
||||
Table for the 0.95 quantile of the chi-square distribution with N degrees of
|
||||
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
|
||||
function and used as Mahalanobis gating threshold.
|
||||
"""
|
||||
chi2inv95 = {
|
||||
1: 3.8415,
|
||||
2: 5.9915,
|
||||
3: 7.8147,
|
||||
4: 9.4877,
|
||||
5: 11.070,
|
||||
6: 12.592,
|
||||
7: 14.067,
|
||||
8: 15.507,
|
||||
9: 16.919}
|
||||
|
||||
|
||||
class KalmanFilter(object):
|
||||
"""
|
||||
A simple Kalman filter for tracking bounding boxes in image space.
|
||||
|
||||
The 8-dimensional state space
|
||||
|
||||
x, y, a, h, vx, vy, va, vh
|
||||
|
||||
contains the bounding box center position (x, y), aspect ratio a, height h,
|
||||
and their respective velocities.
|
||||
|
||||
Object motion follows a constant velocity model. The bounding box location
|
||||
(x, y, a, h) is taken as direct observation of the state space (linear
|
||||
observation model).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
ndim, dt = 4, 1.
|
||||
|
||||
# Create Kalman filter model matrices.
|
||||
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
|
||||
for i in range(ndim):
|
||||
self._motion_mat[i, ndim + i] = dt
|
||||
self._update_mat = np.eye(ndim, 2 * ndim)
|
||||
|
||||
# Motion and observation uncertainty are chosen relative to the current
|
||||
# state estimate. These weights control the amount of uncertainty in
|
||||
# the model. This is a bit hacky.
|
||||
self._std_weight_position = 1. / 20
|
||||
self._std_weight_velocity = 1. / 160
|
||||
|
||||
def initiate(self, measurement):
|
||||
"""Create track from unassociated measurement.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
measurement : ndarray
|
||||
Bounding box coordinates (x, y, a, h) with center position (x, y),
|
||||
aspect ratio a, and height h.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector (8 dimensional) and covariance matrix (8x8
|
||||
dimensional) of the new track. Unobserved velocities are initialized
|
||||
to 0 mean.
|
||||
|
||||
"""
|
||||
mean_pos = measurement
|
||||
mean_vel = np.zeros_like(mean_pos)
|
||||
mean = np.r_[mean_pos, mean_vel]
|
||||
|
||||
std = [
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
1e-2,
|
||||
2 * self._std_weight_position * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
10 * self._std_weight_velocity * measurement[3],
|
||||
1e-5,
|
||||
10 * self._std_weight_velocity * measurement[3]]
|
||||
covariance = np.diag(np.square(std))
|
||||
return mean, covariance
|
||||
|
||||
def predict(self, mean, covariance):
|
||||
"""Run Kalman filter prediction step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The 8 dimensional mean vector of the object state at the previous
|
||||
time step.
|
||||
covariance : ndarray
|
||||
The 8x8 dimensional covariance matrix of the object state at the
|
||||
previous time step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the mean vector and covariance matrix of the predicted
|
||||
state. Unobserved velocities are initialized to 0 mean.
|
||||
|
||||
"""
|
||||
std_pos = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-2,
|
||||
self._std_weight_position * mean[3]]
|
||||
std_vel = [
|
||||
self._std_weight_velocity * mean[3],
|
||||
self._std_weight_velocity * mean[3],
|
||||
1e-5,
|
||||
self._std_weight_velocity * mean[3]]
|
||||
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
|
||||
|
||||
mean = np.dot(self._motion_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
|
||||
|
||||
return mean, covariance
|
||||
|
||||
def project(self, mean, covariance):
|
||||
"""Project state distribution to measurement space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The state's mean vector (8 dimensional array).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the projected mean and covariance matrix of the given state
|
||||
estimate.
|
||||
|
||||
"""
|
||||
std = [
|
||||
self._std_weight_position * mean[3],
|
||||
self._std_weight_position * mean[3],
|
||||
1e-1,
|
||||
self._std_weight_position * mean[3]]
|
||||
innovation_cov = np.diag(np.square(std))
|
||||
|
||||
mean = np.dot(self._update_mat, mean)
|
||||
covariance = np.linalg.multi_dot((
|
||||
self._update_mat, covariance, self._update_mat.T))
|
||||
return mean, covariance + innovation_cov
|
||||
|
||||
def update(self, mean, covariance, measurement):
|
||||
"""Run Kalman filter correction step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
The predicted state's mean vector (8 dimensional).
|
||||
covariance : ndarray
|
||||
The state's covariance matrix (8x8 dimensional).
|
||||
measurement : ndarray
|
||||
The 4 dimensional measurement vector (x, y, a, h), where (x, y)
|
||||
is the center position, a the aspect ratio, and h the height of the
|
||||
bounding box.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(ndarray, ndarray)
|
||||
Returns the measurement-corrected state distribution.
|
||||
|
||||
"""
|
||||
projected_mean, projected_cov = self.project(mean, covariance)
|
||||
|
||||
chol_factor, lower = scipy.linalg.cho_factor(
|
||||
projected_cov, lower=True, check_finite=False)
|
||||
kalman_gain = scipy.linalg.cho_solve(
|
||||
(chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
|
||||
check_finite=False).T
|
||||
innovation = measurement - projected_mean
|
||||
|
||||
new_mean = mean + np.dot(innovation, kalman_gain.T)
|
||||
new_covariance = covariance - np.linalg.multi_dot((
|
||||
kalman_gain, projected_cov, kalman_gain.T))
|
||||
return new_mean, new_covariance
|
||||
|
||||
def gating_distance(self, mean, covariance, measurements,
|
||||
only_position=False):
|
||||
"""Compute gating distance between state distribution and measurements.
|
||||
|
||||
A suitable distance threshold can be obtained from `chi2inv95`. If
|
||||
`only_position` is False, the chi-square distribution has 4 degrees of
|
||||
freedom, otherwise 2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
Mean vector over the state distribution (8 dimensional).
|
||||
covariance : ndarray
|
||||
Covariance of the state distribution (8x8 dimensional).
|
||||
measurements : ndarray
|
||||
An Nx4 dimensional matrix of N measurements, each in
|
||||
format (x, y, a, h) where (x, y) is the bounding box center
|
||||
position, a the aspect ratio, and h the height.
|
||||
only_position : Optional[bool]
|
||||
If True, distance computation is done with respect to the bounding
|
||||
box center position only.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns an array of length N, where the i-th element contains the
|
||||
squared Mahalanobis distance between (mean, covariance) and
|
||||
`measurements[i]`.
|
||||
|
||||
"""
|
||||
mean, covariance = self.project(mean, covariance)
|
||||
if only_position:
|
||||
mean, covariance = mean[:2], covariance[:2, :2]
|
||||
measurements = measurements[:, :2]
|
||||
|
||||
cholesky_factor = np.linalg.cholesky(covariance)
|
||||
d = measurements - mean
|
||||
z = scipy.linalg.solve_triangular(
|
||||
cholesky_factor, d.T, lower=True, check_finite=False,
|
||||
overwrite_b=True)
|
||||
squared_maha = np.sum(z * z, axis=0)
|
||||
return squared_maha
|
|
@ -0,0 +1,192 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
# from sklearn.utils.linear_assignment_ import linear_assignment
|
||||
from scipy.optimize import linear_sum_assignment as linear_assignment
|
||||
from . import kalman_filter
|
||||
|
||||
|
||||
INFTY_COST = 1e+5
|
||||
|
||||
|
||||
def min_cost_matching(
|
||||
distance_metric, max_distance, tracks, detections, track_indices=None,
|
||||
detection_indices=None):
|
||||
"""Solve linear assignment problem.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
||||
The distance metric is given a list of tracks and detections as well as
|
||||
a list of N track indices and M detection indices. The metric should
|
||||
return the NxM dimensional cost matrix, where element (i, j) is the
|
||||
association cost between the i-th track in the given track indices and
|
||||
the j-th detection in the given detection_indices.
|
||||
max_distance : float
|
||||
Gating threshold. Associations with cost larger than this value are
|
||||
disregarded.
|
||||
tracks : List[track.Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[detection.Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : List[int]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above).
|
||||
detection_indices : List[int]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above).
|
||||
|
||||
Returns
|
||||
-------
|
||||
(List[(int, int)], List[int], List[int])
|
||||
Returns a tuple with the following three entries:
|
||||
* A list of matched track and detection indices.
|
||||
* A list of unmatched track indices.
|
||||
* A list of unmatched detection indices.
|
||||
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = np.arange(len(tracks))
|
||||
if detection_indices is None:
|
||||
detection_indices = np.arange(len(detections))
|
||||
|
||||
if len(detection_indices) == 0 or len(track_indices) == 0:
|
||||
return [], track_indices, detection_indices # Nothing to match.
|
||||
|
||||
cost_matrix = distance_metric(
|
||||
tracks, detections, track_indices, detection_indices)
|
||||
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
|
||||
|
||||
row_indices, col_indices = linear_assignment(cost_matrix)
|
||||
|
||||
matches, unmatched_tracks, unmatched_detections = [], [], []
|
||||
for col, detection_idx in enumerate(detection_indices):
|
||||
if col not in col_indices:
|
||||
unmatched_detections.append(detection_idx)
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
if row not in row_indices:
|
||||
unmatched_tracks.append(track_idx)
|
||||
for row, col in zip(row_indices, col_indices):
|
||||
track_idx = track_indices[row]
|
||||
detection_idx = detection_indices[col]
|
||||
if cost_matrix[row, col] > max_distance:
|
||||
unmatched_tracks.append(track_idx)
|
||||
unmatched_detections.append(detection_idx)
|
||||
else:
|
||||
matches.append((track_idx, detection_idx))
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
|
||||
def matching_cascade(
|
||||
distance_metric, max_distance, cascade_depth, tracks, detections,
|
||||
track_indices=None, detection_indices=None):
|
||||
"""Run matching cascade.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
|
||||
The distance metric is given a list of tracks and detections as well as
|
||||
a list of N track indices and M detection indices. The metric should
|
||||
return the NxM dimensional cost matrix, where element (i, j) is the
|
||||
association cost between the i-th track in the given track indices and
|
||||
the j-th detection in the given detection indices.
|
||||
max_distance : float
|
||||
Gating threshold. Associations with cost larger than this value are
|
||||
disregarded.
|
||||
cascade_depth: int
|
||||
The cascade depth, should be se to the maximum track age.
|
||||
tracks : List[track.Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[detection.Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : Optional[List[int]]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above). Defaults to all tracks.
|
||||
detection_indices : Optional[List[int]]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above). Defaults to all
|
||||
detections.
|
||||
|
||||
Returns
|
||||
-------
|
||||
(List[(int, int)], List[int], List[int])
|
||||
Returns a tuple with the following three entries:
|
||||
* A list of matched track and detection indices.
|
||||
* A list of unmatched track indices.
|
||||
* A list of unmatched detection indices.
|
||||
|
||||
"""
|
||||
if track_indices is None:
|
||||
track_indices = list(range(len(tracks)))
|
||||
if detection_indices is None:
|
||||
detection_indices = list(range(len(detections)))
|
||||
|
||||
unmatched_detections = detection_indices
|
||||
matches = []
|
||||
for level in range(cascade_depth):
|
||||
if len(unmatched_detections) == 0: # No detections left
|
||||
break
|
||||
|
||||
track_indices_l = [
|
||||
k for k in track_indices
|
||||
if tracks[k].time_since_update == 1 + level
|
||||
]
|
||||
if len(track_indices_l) == 0: # Nothing to match at this level
|
||||
continue
|
||||
|
||||
matches_l, _, unmatched_detections = \
|
||||
min_cost_matching(
|
||||
distance_metric, max_distance, tracks, detections,
|
||||
track_indices_l, unmatched_detections)
|
||||
matches += matches_l
|
||||
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
|
||||
def gate_cost_matrix(
|
||||
kf, cost_matrix, tracks, detections, track_indices, detection_indices,
|
||||
gated_cost=INFTY_COST, only_position=False):
|
||||
"""Invalidate infeasible entries in cost matrix based on the state
|
||||
distributions obtained by Kalman filtering.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kf : The Kalman filter.
|
||||
cost_matrix : ndarray
|
||||
The NxM dimensional cost matrix, where N is the number of track indices
|
||||
and M is the number of detection indices, such that entry (i, j) is the
|
||||
association cost between `tracks[track_indices[i]]` and
|
||||
`detections[detection_indices[j]]`.
|
||||
tracks : List[track.Track]
|
||||
A list of predicted tracks at the current time step.
|
||||
detections : List[detection.Detection]
|
||||
A list of detections at the current time step.
|
||||
track_indices : List[int]
|
||||
List of track indices that maps rows in `cost_matrix` to tracks in
|
||||
`tracks` (see description above).
|
||||
detection_indices : List[int]
|
||||
List of detection indices that maps columns in `cost_matrix` to
|
||||
detections in `detections` (see description above).
|
||||
gated_cost : Optional[float]
|
||||
Entries in the cost matrix corresponding to infeasible associations are
|
||||
set this value. Defaults to a very large value.
|
||||
only_position : Optional[bool]
|
||||
If True, only the x, y position of the state distribution is considered
|
||||
during gating. Defaults to False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns the modified cost matrix.
|
||||
|
||||
"""
|
||||
gating_dim = 2 if only_position else 4
|
||||
gating_threshold = kalman_filter.chi2inv95[gating_dim]
|
||||
measurements = np.asarray(
|
||||
[detections[i].to_xyah() for i in detection_indices])
|
||||
for row, track_idx in enumerate(track_indices):
|
||||
track = tracks[track_idx]
|
||||
gating_distance = kf.gating_distance(
|
||||
track.mean, track.covariance, measurements, only_position)
|
||||
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
|
||||
return cost_matrix
|
|
@ -0,0 +1,177 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _pdist(a, b):
|
||||
"""Compute pair-wise squared distance between points in `a` and `b`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : array_like
|
||||
An NxM matrix of N samples of dimensionality M.
|
||||
b : array_like
|
||||
An LxM matrix of L samples of dimensionality M.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns a matrix of size len(a), len(b) such that eleement (i, j)
|
||||
contains the squared distance between `a[i]` and `b[j]`.
|
||||
|
||||
"""
|
||||
a, b = np.asarray(a), np.asarray(b)
|
||||
if len(a) == 0 or len(b) == 0:
|
||||
return np.zeros((len(a), len(b)))
|
||||
a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
|
||||
r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
|
||||
r2 = np.clip(r2, 0., float(np.inf))
|
||||
return r2
|
||||
|
||||
|
||||
def _cosine_distance(a, b, data_is_normalized=False):
|
||||
"""Compute pair-wise cosine distance between points in `a` and `b`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : array_like
|
||||
An NxM matrix of N samples of dimensionality M.
|
||||
b : array_like
|
||||
An LxM matrix of L samples of dimensionality M.
|
||||
data_is_normalized : Optional[bool]
|
||||
If True, assumes rows in a and b are unit length vectors.
|
||||
Otherwise, a and b are explicitly normalized to lenght 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns a matrix of size len(a), len(b) such that eleement (i, j)
|
||||
contains the squared distance between `a[i]` and `b[j]`.
|
||||
|
||||
"""
|
||||
if not data_is_normalized:
|
||||
a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
|
||||
b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
|
||||
return 1. - np.dot(a, b.T)
|
||||
|
||||
|
||||
def _nn_euclidean_distance(x, y):
|
||||
""" Helper function for nearest neighbor distance metric (Euclidean).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
A matrix of N row-vectors (sample points).
|
||||
y : ndarray
|
||||
A matrix of M row-vectors (query points).
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
A vector of length M that contains for each entry in `y` the
|
||||
smallest Euclidean distance to a sample in `x`.
|
||||
|
||||
"""
|
||||
distances = _pdist(x, y)
|
||||
return np.maximum(0.0, distances.min(axis=0))
|
||||
|
||||
|
||||
def _nn_cosine_distance(x, y):
|
||||
""" Helper function for nearest neighbor distance metric (cosine).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : ndarray
|
||||
A matrix of N row-vectors (sample points).
|
||||
y : ndarray
|
||||
A matrix of M row-vectors (query points).
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
A vector of length M that contains for each entry in `y` the
|
||||
smallest cosine distance to a sample in `x`.
|
||||
|
||||
"""
|
||||
distances = _cosine_distance(x, y)
|
||||
return distances.min(axis=0)
|
||||
|
||||
|
||||
class NearestNeighborDistanceMetric(object):
|
||||
"""
|
||||
A nearest neighbor distance metric that, for each target, returns
|
||||
the closest distance to any sample that has been observed so far.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric : str
|
||||
Either "euclidean" or "cosine".
|
||||
matching_threshold: float
|
||||
The matching threshold. Samples with larger distance are considered an
|
||||
invalid match.
|
||||
budget : Optional[int]
|
||||
If not None, fix samples per class to at most this number. Removes
|
||||
the oldest samples when the budget is reached.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
samples : Dict[int -> List[ndarray]]
|
||||
A dictionary that maps from target identities to the list of samples
|
||||
that have been observed so far.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, metric, matching_threshold, budget=None):
|
||||
|
||||
|
||||
if metric == "euclidean":
|
||||
self._metric = _nn_euclidean_distance
|
||||
elif metric == "cosine":
|
||||
self._metric = _nn_cosine_distance
|
||||
else:
|
||||
raise ValueError(
|
||||
"Invalid metric; must be either 'euclidean' or 'cosine'")
|
||||
self.matching_threshold = matching_threshold
|
||||
self.budget = budget
|
||||
self.samples = {}
|
||||
|
||||
def partial_fit(self, features, targets, active_targets):
|
||||
"""Update the distance metric with new data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
features : ndarray
|
||||
An NxM matrix of N features of dimensionality M.
|
||||
targets : ndarray
|
||||
An integer array of associated target identities.
|
||||
active_targets : List[int]
|
||||
A list of targets that are currently present in the scene.
|
||||
|
||||
"""
|
||||
for feature, target in zip(features, targets):
|
||||
self.samples.setdefault(target, []).append(feature)
|
||||
if self.budget is not None:
|
||||
self.samples[target] = self.samples[target][-self.budget:]
|
||||
self.samples = {k: self.samples[k] for k in active_targets}
|
||||
|
||||
def distance(self, features, targets):
|
||||
"""Compute distance between features and targets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
features : ndarray
|
||||
An NxM matrix of N features of dimensionality M.
|
||||
targets : List[int]
|
||||
A list of targets to match the given `features` against.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
Returns a cost matrix of shape len(targets), len(features), where
|
||||
element (i, j) contains the closest squared distance between
|
||||
`targets[i]` and `features[j]`.
|
||||
|
||||
"""
|
||||
cost_matrix = np.zeros((len(targets), len(features)))
|
||||
for i, target in enumerate(targets):
|
||||
cost_matrix[i, :] = self._metric(self.samples[target], features)
|
||||
return cost_matrix
|
|
@ -0,0 +1,73 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
def non_max_suppression(boxes, max_bbox_overlap, scores=None):
|
||||
"""Suppress overlapping detections.
|
||||
|
||||
Original code from [1]_ has been adapted to include confidence score.
|
||||
|
||||
.. [1] http://www.pyimagesearch.com/2015/02/16/
|
||||
faster-non-maximum-suppression-python/
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> boxes = [d.roi for d in detections]
|
||||
>>> scores = [d.confidence for d in detections]
|
||||
>>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
|
||||
>>> detections = [detections[i] for i in indices]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
boxes : ndarray
|
||||
Array of ROIs (x, y, width, height).
|
||||
max_bbox_overlap : float
|
||||
ROIs that overlap more than this values are suppressed.
|
||||
scores : Optional[array_like]
|
||||
Detector confidence score.
|
||||
|
||||
Returns
|
||||
-------
|
||||
List[int]
|
||||
Returns indices of detections that have survived non-maxima suppression.
|
||||
|
||||
"""
|
||||
if len(boxes) == 0:
|
||||
return []
|
||||
|
||||
boxes = boxes.astype(np.float)
|
||||
pick = []
|
||||
|
||||
x1 = boxes[:, 0]
|
||||
y1 = boxes[:, 1]
|
||||
x2 = boxes[:, 2] + boxes[:, 0]
|
||||
y2 = boxes[:, 3] + boxes[:, 1]
|
||||
|
||||
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
if scores is not None:
|
||||
idxs = np.argsort(scores)
|
||||
else:
|
||||
idxs = np.argsort(y2)
|
||||
|
||||
while len(idxs) > 0:
|
||||
last = len(idxs) - 1
|
||||
i = idxs[last]
|
||||
pick.append(i)
|
||||
|
||||
xx1 = np.maximum(x1[i], x1[idxs[:last]])
|
||||
yy1 = np.maximum(y1[i], y1[idxs[:last]])
|
||||
xx2 = np.minimum(x2[i], x2[idxs[:last]])
|
||||
yy2 = np.minimum(y2[i], y2[idxs[:last]])
|
||||
|
||||
w = np.maximum(0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0, yy2 - yy1 + 1)
|
||||
|
||||
overlap = (w * h) / area[idxs[:last]]
|
||||
|
||||
idxs = np.delete(
|
||||
idxs, np.concatenate(
|
||||
([last], np.where(overlap > max_bbox_overlap)[0])))
|
||||
|
||||
return pick
|
|
@ -0,0 +1,166 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
|
||||
|
||||
class TrackState:
|
||||
"""
|
||||
Enumeration type for the single target track state. Newly created tracks are
|
||||
classified as `tentative` until enough evidence has been collected. Then,
|
||||
the track state is changed to `confirmed`. Tracks that are no longer alive
|
||||
are classified as `deleted` to mark them for removal from the set of active
|
||||
tracks.
|
||||
|
||||
"""
|
||||
|
||||
Tentative = 1
|
||||
Confirmed = 2
|
||||
Deleted = 3
|
||||
|
||||
|
||||
class Track:
|
||||
"""
|
||||
A single target track with state space `(x, y, a, h)` and associated
|
||||
velocities, where `(x, y)` is the center of the bounding box, `a` is the
|
||||
aspect ratio and `h` is the height.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mean : ndarray
|
||||
Mean vector of the initial state distribution.
|
||||
covariance : ndarray
|
||||
Covariance matrix of the initial state distribution.
|
||||
track_id : int
|
||||
A unique track identifier.
|
||||
n_init : int
|
||||
Number of consecutive detections before the track is confirmed. The
|
||||
track state is set to `Deleted` if a miss occurs within the first
|
||||
`n_init` frames.
|
||||
max_age : int
|
||||
The maximum number of consecutive misses before the track state is
|
||||
set to `Deleted`.
|
||||
feature : Optional[ndarray]
|
||||
Feature vector of the detection this track originates from. If not None,
|
||||
this feature is added to the `features` cache.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
mean : ndarray
|
||||
Mean vector of the initial state distribution.
|
||||
covariance : ndarray
|
||||
Covariance matrix of the initial state distribution.
|
||||
track_id : int
|
||||
A unique track identifier.
|
||||
hits : int
|
||||
Total number of measurement updates.
|
||||
age : int
|
||||
Total number of frames since first occurance.
|
||||
time_since_update : int
|
||||
Total number of frames since last measurement update.
|
||||
state : TrackState
|
||||
The current track state.
|
||||
features : List[ndarray]
|
||||
A cache of features. On each measurement update, the associated feature
|
||||
vector is added to this list.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, mean, covariance, track_id, n_init, max_age,
|
||||
feature=None):
|
||||
self.mean = mean
|
||||
self.covariance = covariance
|
||||
self.track_id = track_id
|
||||
self.hits = 1
|
||||
self.age = 1
|
||||
self.time_since_update = 0
|
||||
|
||||
self.state = TrackState.Tentative
|
||||
self.features = []
|
||||
if feature is not None:
|
||||
self.features.append(feature)
|
||||
|
||||
self._n_init = n_init
|
||||
self._max_age = max_age
|
||||
|
||||
def to_tlwh(self):
|
||||
"""Get current position in bounding box format `(top left x, top left y,
|
||||
width, height)`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The bounding box.
|
||||
|
||||
"""
|
||||
ret = self.mean[:4].copy()
|
||||
ret[2] *= ret[3]
|
||||
ret[:2] -= ret[2:] / 2
|
||||
return ret
|
||||
|
||||
def to_tlbr(self):
|
||||
"""Get current position in bounding box format `(min x, miny, max x,
|
||||
max y)`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ndarray
|
||||
The bounding box.
|
||||
|
||||
"""
|
||||
ret = self.to_tlwh()
|
||||
ret[2:] = ret[:2] + ret[2:]
|
||||
return ret
|
||||
|
||||
def predict(self, kf):
|
||||
"""Propagate the state distribution to the current time step using a
|
||||
Kalman filter prediction step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kf : kalman_filter.KalmanFilter
|
||||
The Kalman filter.
|
||||
|
||||
"""
|
||||
self.mean, self.covariance = kf.predict(self.mean, self.covariance)
|
||||
self.age += 1
|
||||
self.time_since_update += 1
|
||||
|
||||
def update(self, kf, detection):
|
||||
"""Perform Kalman filter measurement update step and update the feature
|
||||
cache.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kf : kalman_filter.KalmanFilter
|
||||
The Kalman filter.
|
||||
detection : Detection
|
||||
The associated detection.
|
||||
|
||||
"""
|
||||
self.mean, self.covariance = kf.update(
|
||||
self.mean, self.covariance, detection.to_xyah())
|
||||
self.features.append(detection.feature)
|
||||
|
||||
self.hits += 1
|
||||
self.time_since_update = 0
|
||||
if self.state == TrackState.Tentative and self.hits >= self._n_init:
|
||||
self.state = TrackState.Confirmed
|
||||
|
||||
def mark_missed(self):
|
||||
"""Mark this track as missed (no association at the current time step).
|
||||
"""
|
||||
if self.state == TrackState.Tentative:
|
||||
self.state = TrackState.Deleted
|
||||
elif self.time_since_update > self._max_age:
|
||||
self.state = TrackState.Deleted
|
||||
|
||||
def is_tentative(self):
|
||||
"""Returns True if this track is tentative (unconfirmed).
|
||||
"""
|
||||
return self.state == TrackState.Tentative
|
||||
|
||||
def is_confirmed(self):
|
||||
"""Returns True if this track is confirmed."""
|
||||
return self.state == TrackState.Confirmed
|
||||
|
||||
def is_deleted(self):
|
||||
"""Returns True if this track is dead and should be deleted."""
|
||||
return self.state == TrackState.Deleted
|
|
@ -0,0 +1,138 @@
|
|||
# vim: expandtab:ts=4:sw=4
|
||||
from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import kalman_filter
|
||||
from . import linear_assignment
|
||||
from . import iou_matching
|
||||
from .track import Track
|
||||
|
||||
|
||||
class Tracker:
|
||||
"""
|
||||
This is the multi-target tracker.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
metric : nn_matching.NearestNeighborDistanceMetric
|
||||
A distance metric for measurement-to-track association.
|
||||
max_age : int
|
||||
Maximum number of missed misses before a track is deleted.
|
||||
n_init : int
|
||||
Number of consecutive detections before the track is confirmed. The
|
||||
track state is set to `Deleted` if a miss occurs within the first
|
||||
`n_init` frames.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
metric : nn_matching.NearestNeighborDistanceMetric
|
||||
The distance metric used for measurement to track association.
|
||||
max_age : int
|
||||
Maximum number of missed misses before a track is deleted.
|
||||
n_init : int
|
||||
Number of frames that a track remains in initialization phase.
|
||||
kf : kalman_filter.KalmanFilter
|
||||
A Kalman filter to filter target trajectories in image space.
|
||||
tracks : List[Track]
|
||||
The list of active tracks at the current time step.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
|
||||
self.metric = metric
|
||||
self.max_iou_distance = max_iou_distance
|
||||
self.max_age = max_age
|
||||
self.n_init = n_init
|
||||
|
||||
self.kf = kalman_filter.KalmanFilter()
|
||||
self.tracks = []
|
||||
self._next_id = 1
|
||||
|
||||
def predict(self):
|
||||
"""Propagate track state distributions one time step forward.
|
||||
|
||||
This function should be called once every time step, before `update`.
|
||||
"""
|
||||
for track in self.tracks:
|
||||
track.predict(self.kf)
|
||||
|
||||
def update(self, detections):
|
||||
"""Perform measurement update and track management.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
detections : List[deep_sort.detection.Detection]
|
||||
A list of detections at the current time step.
|
||||
|
||||
"""
|
||||
# Run matching cascade.
|
||||
matches, unmatched_tracks, unmatched_detections = \
|
||||
self._match(detections)
|
||||
|
||||
# Update track set.
|
||||
for track_idx, detection_idx in matches:
|
||||
self.tracks[track_idx].update(
|
||||
self.kf, detections[detection_idx])
|
||||
for track_idx in unmatched_tracks:
|
||||
self.tracks[track_idx].mark_missed()
|
||||
for detection_idx in unmatched_detections:
|
||||
self._initiate_track(detections[detection_idx])
|
||||
self.tracks = [t for t in self.tracks if not t.is_deleted()]
|
||||
|
||||
# Update distance metric.
|
||||
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
|
||||
features, targets = [], []
|
||||
for track in self.tracks:
|
||||
if not track.is_confirmed():
|
||||
continue
|
||||
features += track.features
|
||||
targets += [track.track_id for _ in track.features]
|
||||
track.features = []
|
||||
self.metric.partial_fit(
|
||||
np.asarray(features), np.asarray(targets), active_targets)
|
||||
|
||||
def _match(self, detections):
|
||||
|
||||
def gated_metric(tracks, dets, track_indices, detection_indices):
|
||||
features = np.array([dets[i].feature for i in detection_indices])
|
||||
targets = np.array([tracks[i].track_id for i in track_indices])
|
||||
cost_matrix = self.metric.distance(features, targets)
|
||||
cost_matrix = linear_assignment.gate_cost_matrix(
|
||||
self.kf, cost_matrix, tracks, dets, track_indices,
|
||||
detection_indices)
|
||||
|
||||
return cost_matrix
|
||||
|
||||
# Split track set into confirmed and unconfirmed tracks.
|
||||
confirmed_tracks = [
|
||||
i for i, t in enumerate(self.tracks) if t.is_confirmed()]
|
||||
unconfirmed_tracks = [
|
||||
i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
|
||||
|
||||
# Associate confirmed tracks using appearance features.
|
||||
matches_a, unmatched_tracks_a, unmatched_detections = \
|
||||
linear_assignment.matching_cascade(
|
||||
gated_metric, self.metric.matching_threshold, self.max_age,
|
||||
self.tracks, detections, confirmed_tracks)
|
||||
|
||||
# Associate remaining tracks together with unconfirmed tracks using IOU.
|
||||
iou_track_candidates = unconfirmed_tracks + [
|
||||
k for k in unmatched_tracks_a if
|
||||
self.tracks[k].time_since_update == 1]
|
||||
unmatched_tracks_a = [
|
||||
k for k in unmatched_tracks_a if
|
||||
self.tracks[k].time_since_update != 1]
|
||||
matches_b, unmatched_tracks_b, unmatched_detections = \
|
||||
linear_assignment.min_cost_matching(
|
||||
iou_matching.iou_cost, self.max_iou_distance, self.tracks,
|
||||
detections, iou_track_candidates, unmatched_detections)
|
||||
|
||||
matches = matches_a + matches_b
|
||||
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
|
||||
return matches, unmatched_tracks, unmatched_detections
|
||||
|
||||
def _initiate_track(self, detection):
|
||||
mean, covariance = self.kf.initiate(detection.to_xyah())
|
||||
self.tracks.append(Track(
|
||||
mean, covariance, self._next_id, self.n_init, self.max_age,
|
||||
detection.feature))
|
||||
self._next_id += 1
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,13 @@
|
|||
from os import environ
|
||||
|
||||
|
||||
def assert_in(file, files_to_check):
|
||||
if file not in files_to_check:
|
||||
raise AssertionError("{} does not exist in the list".format(str(file)))
|
||||
return True
|
||||
|
||||
|
||||
def assert_in_env(check_list: list):
|
||||
for item in check_list:
|
||||
assert_in(item, environ.keys())
|
||||
return True
|
|
@ -0,0 +1,36 @@
|
|||
import numpy as np
|
||||
import cv2
|
||||
|
||||
palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
|
||||
|
||||
|
||||
def compute_color_for_labels(label):
|
||||
"""
|
||||
Simple function that adds fixed color depending on the class
|
||||
"""
|
||||
color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
|
||||
return tuple(color)
|
||||
|
||||
|
||||
def draw_boxes(img, bbox, identities=None, offset=(0,0)):
|
||||
for i,box in enumerate(bbox):
|
||||
x1,y1,x2,y2 = [int(i) for i in box]
|
||||
x1 += offset[0]
|
||||
x2 += offset[0]
|
||||
y1 += offset[1]
|
||||
y2 += offset[1]
|
||||
# box text and bar
|
||||
id = int(identities[i]) if identities is not None else 0
|
||||
color = compute_color_for_labels(id)
|
||||
label = '{}{:d}'.format("", id)
|
||||
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
|
||||
cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
|
||||
cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
|
||||
cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
|
||||
return img
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for i in range(82):
|
||||
print(compute_color_for_labels(i))
|
|
@ -0,0 +1,103 @@
|
|||
import os
|
||||
import numpy as np
|
||||
import copy
|
||||
import motmetrics as mm
|
||||
mm.lap.default_solver = 'lap'
|
||||
from utils.io import read_results, unzip_objs
|
||||
|
||||
|
||||
class Evaluator(object):
|
||||
|
||||
def __init__(self, data_root, seq_name, data_type):
|
||||
self.data_root = data_root
|
||||
self.seq_name = seq_name
|
||||
self.data_type = data_type
|
||||
|
||||
self.load_annotations()
|
||||
self.reset_accumulator()
|
||||
|
||||
def load_annotations(self):
|
||||
assert self.data_type == 'mot'
|
||||
|
||||
gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
|
||||
self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
|
||||
self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
|
||||
|
||||
def reset_accumulator(self):
|
||||
self.acc = mm.MOTAccumulator(auto_id=True)
|
||||
|
||||
def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
|
||||
# results
|
||||
trk_tlwhs = np.copy(trk_tlwhs)
|
||||
trk_ids = np.copy(trk_ids)
|
||||
|
||||
# gts
|
||||
gt_objs = self.gt_frame_dict.get(frame_id, [])
|
||||
gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
|
||||
|
||||
# ignore boxes
|
||||
ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
|
||||
ignore_tlwhs = unzip_objs(ignore_objs)[0]
|
||||
|
||||
|
||||
# remove ignored results
|
||||
keep = np.ones(len(trk_tlwhs), dtype=bool)
|
||||
iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
if len(iou_distance) > 0:
|
||||
match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
|
||||
match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
|
||||
match_ious = iou_distance[match_is, match_js]
|
||||
|
||||
match_js = np.asarray(match_js, dtype=int)
|
||||
match_js = match_js[np.logical_not(np.isnan(match_ious))]
|
||||
keep[match_js] = False
|
||||
trk_tlwhs = trk_tlwhs[keep]
|
||||
trk_ids = trk_ids[keep]
|
||||
|
||||
# get distance matrix
|
||||
iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
|
||||
|
||||
# acc
|
||||
self.acc.update(gt_ids, trk_ids, iou_distance)
|
||||
|
||||
if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
|
||||
events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
|
||||
else:
|
||||
events = None
|
||||
return events
|
||||
|
||||
def eval_file(self, filename):
|
||||
self.reset_accumulator()
|
||||
|
||||
result_frame_dict = read_results(filename, self.data_type, is_gt=False)
|
||||
frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
|
||||
for frame_id in frames:
|
||||
trk_objs = result_frame_dict.get(frame_id, [])
|
||||
trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
|
||||
self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
|
||||
|
||||
return self.acc
|
||||
|
||||
@staticmethod
|
||||
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
|
||||
names = copy.deepcopy(names)
|
||||
if metrics is None:
|
||||
metrics = mm.metrics.motchallenge_metrics
|
||||
metrics = copy.deepcopy(metrics)
|
||||
|
||||
mh = mm.metrics.create()
|
||||
summary = mh.compute_many(
|
||||
accs,
|
||||
metrics=metrics,
|
||||
names=names,
|
||||
generate_overall=True
|
||||
)
|
||||
|
||||
return summary
|
||||
|
||||
@staticmethod
|
||||
def save_summary(summary, filename):
|
||||
import pandas as pd
|
||||
writer = pd.ExcelWriter(filename)
|
||||
summary.to_excel(writer)
|
||||
writer.save()
|
|
@ -0,0 +1,133 @@
|
|||
import os
|
||||
from typing import Dict
|
||||
import numpy as np
|
||||
|
||||
# from utils.log import get_logger
|
||||
|
||||
|
||||
def write_results(filename, results, data_type):
|
||||
if data_type == 'mot':
|
||||
save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
|
||||
elif data_type == 'kitti':
|
||||
save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
|
||||
else:
|
||||
raise ValueError(data_type)
|
||||
|
||||
with open(filename, 'w') as f:
|
||||
for frame_id, tlwhs, track_ids in results:
|
||||
if data_type == 'kitti':
|
||||
frame_id -= 1
|
||||
for tlwh, track_id in zip(tlwhs, track_ids):
|
||||
if track_id < 0:
|
||||
continue
|
||||
x1, y1, w, h = tlwh
|
||||
x2, y2 = x1 + w, y1 + h
|
||||
line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
|
||||
f.write(line)
|
||||
|
||||
|
||||
# def write_results(filename, results_dict: Dict, data_type: str):
|
||||
# if not filename:
|
||||
# return
|
||||
# path = os.path.dirname(filename)
|
||||
# if not os.path.exists(path):
|
||||
# os.makedirs(path)
|
||||
|
||||
# if data_type in ('mot', 'mcmot', 'lab'):
|
||||
# save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
|
||||
# elif data_type == 'kitti':
|
||||
# save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
|
||||
# else:
|
||||
# raise ValueError(data_type)
|
||||
|
||||
# with open(filename, 'w') as f:
|
||||
# for frame_id, frame_data in results_dict.items():
|
||||
# if data_type == 'kitti':
|
||||
# frame_id -= 1
|
||||
# for tlwh, track_id in frame_data:
|
||||
# if track_id < 0:
|
||||
# continue
|
||||
# x1, y1, w, h = tlwh
|
||||
# x2, y2 = x1 + w, y1 + h
|
||||
# line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
|
||||
# f.write(line)
|
||||
# logger.info('Save results to {}'.format(filename))
|
||||
|
||||
|
||||
def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
|
||||
if data_type in ('mot', 'lab'):
|
||||
read_fun = read_mot_results
|
||||
else:
|
||||
raise ValueError('Unknown data type: {}'.format(data_type))
|
||||
|
||||
return read_fun(filename, is_gt, is_ignore)
|
||||
|
||||
|
||||
"""
|
||||
labels={'ped', ... % 1
|
||||
'person_on_vhcl', ... % 2
|
||||
'car', ... % 3
|
||||
'bicycle', ... % 4
|
||||
'mbike', ... % 5
|
||||
'non_mot_vhcl', ... % 6
|
||||
'static_person', ... % 7
|
||||
'distractor', ... % 8
|
||||
'occluder', ... % 9
|
||||
'occluder_on_grnd', ... %10
|
||||
'occluder_full', ... % 11
|
||||
'reflection', ... % 12
|
||||
'crowd' ... % 13
|
||||
};
|
||||
"""
|
||||
|
||||
|
||||
def read_mot_results(filename, is_gt, is_ignore):
|
||||
valid_labels = {1}
|
||||
ignore_labels = {2, 7, 8, 12}
|
||||
results_dict = dict()
|
||||
if os.path.isfile(filename):
|
||||
with open(filename, 'r') as f:
|
||||
for line in f.readlines():
|
||||
linelist = line.split(',')
|
||||
if len(linelist) < 7:
|
||||
continue
|
||||
fid = int(linelist[0])
|
||||
if fid < 1:
|
||||
continue
|
||||
results_dict.setdefault(fid, list())
|
||||
|
||||
if is_gt:
|
||||
if 'MOT16-' in filename or 'MOT17-' in filename:
|
||||
label = int(float(linelist[7]))
|
||||
mark = int(float(linelist[6]))
|
||||
if mark == 0 or label not in valid_labels:
|
||||
continue
|
||||
score = 1
|
||||
elif is_ignore:
|
||||
if 'MOT16-' in filename or 'MOT17-' in filename:
|
||||
label = int(float(linelist[7]))
|
||||
vis_ratio = float(linelist[8])
|
||||
if label not in ignore_labels and vis_ratio >= 0:
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
score = 1
|
||||
else:
|
||||
score = float(linelist[6])
|
||||
|
||||
tlwh = tuple(map(float, linelist[2:6]))
|
||||
target_id = int(linelist[1])
|
||||
|
||||
results_dict[fid].append((tlwh, target_id, score))
|
||||
|
||||
return results_dict
|
||||
|
||||
|
||||
def unzip_objs(objs):
|
||||
if len(objs) > 0:
|
||||
tlwhs, ids, scores = zip(*objs)
|
||||
else:
|
||||
tlwhs, ids, scores = [], [], []
|
||||
tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
|
||||
|
||||
return tlwhs, ids, scores
|
|
@ -0,0 +1,383 @@
|
|||
"""
|
||||
References:
|
||||
https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
|
||||
"""
|
||||
import json
|
||||
from os import makedirs
|
||||
from os.path import exists, join
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class JsonMeta(object):
|
||||
HOURS = 3
|
||||
MINUTES = 59
|
||||
SECONDS = 59
|
||||
PATH_TO_SAVE = 'LOGS'
|
||||
DEFAULT_FILE_NAME = 'remaining'
|
||||
|
||||
|
||||
class BaseJsonLogger(object):
|
||||
"""
|
||||
This is the base class that returns __dict__ of its own
|
||||
it also returns the dicts of objects in the attributes that are list instances
|
||||
|
||||
"""
|
||||
|
||||
def dic(self):
|
||||
# returns dicts of objects
|
||||
out = {}
|
||||
for k, v in self.__dict__.items():
|
||||
if hasattr(v, 'dic'):
|
||||
out[k] = v.dic()
|
||||
elif isinstance(v, list):
|
||||
out[k] = self.list(v)
|
||||
else:
|
||||
out[k] = v
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def list(values):
|
||||
# applies the dic method on items in the list
|
||||
return [v.dic() if hasattr(v, 'dic') else v for v in values]
|
||||
|
||||
|
||||
class Label(BaseJsonLogger):
|
||||
"""
|
||||
For each bounding box there are various categories with confidences. Label class keeps track of that information.
|
||||
"""
|
||||
|
||||
def __init__(self, category: str, confidence: float):
|
||||
self.category = category
|
||||
self.confidence = confidence
|
||||
|
||||
|
||||
class Bbox(BaseJsonLogger):
|
||||
"""
|
||||
This module stores the information for each frame and use them in JsonParser
|
||||
Attributes:
|
||||
labels (list): List of label module.
|
||||
top (int):
|
||||
left (int):
|
||||
width (int):
|
||||
height (int):
|
||||
|
||||
Args:
|
||||
bbox_id (float):
|
||||
top (int):
|
||||
left (int):
|
||||
width (int):
|
||||
height (int):
|
||||
|
||||
References:
|
||||
Check Label module for better understanding.
|
||||
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, bbox_id, top, left, width, height):
|
||||
self.labels = []
|
||||
self.bbox_id = bbox_id
|
||||
self.top = top
|
||||
self.left = left
|
||||
self.width = width
|
||||
self.height = height
|
||||
|
||||
def add_label(self, category, confidence):
|
||||
# adds category and confidence only if top_k is not exceeded.
|
||||
self.labels.append(Label(category, confidence))
|
||||
|
||||
def labels_full(self, value):
|
||||
return len(self.labels) == value
|
||||
|
||||
|
||||
class Frame(BaseJsonLogger):
|
||||
"""
|
||||
This module stores the information for each frame and use them in JsonParser
|
||||
Attributes:
|
||||
timestamp (float): The elapsed time of captured frame
|
||||
frame_id (int): The frame number of the captured video
|
||||
bboxes (list of Bbox objects): Stores the list of bbox objects.
|
||||
|
||||
References:
|
||||
Check Bbox class for better information
|
||||
|
||||
Args:
|
||||
timestamp (float):
|
||||
frame_id (int):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, frame_id: int, timestamp: float = None):
|
||||
self.frame_id = frame_id
|
||||
self.timestamp = timestamp
|
||||
self.bboxes = []
|
||||
|
||||
def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
|
||||
bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
|
||||
if bbox_id not in bboxes_ids:
|
||||
self.bboxes.append(Bbox(bbox_id, top, left, width, height))
|
||||
else:
|
||||
raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
|
||||
|
||||
def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
|
||||
bboxes = {bbox.id: bbox for bbox in self.bboxes}
|
||||
if bbox_id in bboxes.keys():
|
||||
res = bboxes.get(bbox_id)
|
||||
res.add_label(category, confidence)
|
||||
else:
|
||||
raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
|
||||
|
||||
|
||||
class BboxToJsonLogger(BaseJsonLogger):
|
||||
"""
|
||||
ُ This module is designed to automate the task of logging jsons. An example json is used
|
||||
to show the contents of json file shortly
|
||||
Example:
|
||||
{
|
||||
"video_details": {
|
||||
"frame_width": 1920,
|
||||
"frame_height": 1080,
|
||||
"frame_rate": 20,
|
||||
"video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
|
||||
},
|
||||
"frames": [
|
||||
{
|
||||
"frame_id": 329,
|
||||
"timestamp": 3365.1254
|
||||
"bboxes": [
|
||||
{
|
||||
"labels": [
|
||||
{
|
||||
"category": "pedestrian",
|
||||
"confidence": 0.9
|
||||
}
|
||||
],
|
||||
"bbox_id": 0,
|
||||
"top": 1257,
|
||||
"left": 138,
|
||||
"width": 68,
|
||||
"height": 109
|
||||
}
|
||||
]
|
||||
}],
|
||||
|
||||
Attributes:
|
||||
frames (dict): It's a dictionary that maps each frame_id to json attributes.
|
||||
video_details (dict): information about video file.
|
||||
top_k_labels (int): shows the allowed number of labels
|
||||
start_time (datetime object): we use it to automate the json output by time.
|
||||
|
||||
Args:
|
||||
top_k_labels (int): shows the allowed number of labels
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, top_k_labels: int = 1):
|
||||
self.frames = {}
|
||||
self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
|
||||
video_name=None)
|
||||
self.top_k_labels = top_k_labels
|
||||
self.start_time = datetime.now()
|
||||
|
||||
def set_top_k(self, value):
|
||||
self.top_k_labels = value
|
||||
|
||||
def frame_exists(self, frame_id: int) -> bool:
|
||||
"""
|
||||
Args:
|
||||
frame_id (int):
|
||||
|
||||
Returns:
|
||||
bool: true if frame_id is recognized
|
||||
"""
|
||||
return frame_id in self.frames.keys()
|
||||
|
||||
def add_frame(self, frame_id: int, timestamp: float = None) -> None:
|
||||
"""
|
||||
Args:
|
||||
frame_id (int):
|
||||
timestamp (float): opencv captured frame time property
|
||||
|
||||
Raises:
|
||||
ValueError: if frame_id would not exist in class frames attribute
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
if not self.frame_exists(frame_id):
|
||||
self.frames[frame_id] = Frame(frame_id, timestamp)
|
||||
else:
|
||||
raise ValueError("Frame id: {} already exists".format(frame_id))
|
||||
|
||||
def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
|
||||
"""
|
||||
Args:
|
||||
frame_id:
|
||||
bbox_id:
|
||||
|
||||
Returns:
|
||||
bool: if bbox exists in frame bboxes list
|
||||
"""
|
||||
bboxes = []
|
||||
if self.frame_exists(frame_id=frame_id):
|
||||
bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
|
||||
return bbox_id in bboxes
|
||||
|
||||
def find_bbox(self, frame_id: int, bbox_id: int):
|
||||
"""
|
||||
|
||||
Args:
|
||||
frame_id:
|
||||
bbox_id:
|
||||
|
||||
Returns:
|
||||
bbox_id (int):
|
||||
|
||||
Raises:
|
||||
ValueError: if bbox_id does not exist in the bbox list of specific frame.
|
||||
"""
|
||||
if not self.bbox_exists(frame_id, bbox_id):
|
||||
raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
|
||||
bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
|
||||
return bboxes.get(bbox_id)
|
||||
|
||||
def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
|
||||
"""
|
||||
|
||||
Args:
|
||||
frame_id (int):
|
||||
bbox_id (int):
|
||||
top (int):
|
||||
left (int):
|
||||
width (int):
|
||||
height (int):
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: if bbox_id already exist in frame information with frame_id
|
||||
ValueError: if frame_id does not exist in frames attribute
|
||||
"""
|
||||
if self.frame_exists(frame_id):
|
||||
frame = self.frames[frame_id]
|
||||
if not self.bbox_exists(frame_id, bbox_id):
|
||||
frame.add_bbox(bbox_id, top, left, width, height)
|
||||
else:
|
||||
raise ValueError(
|
||||
"frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
|
||||
else:
|
||||
raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
|
||||
|
||||
def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
|
||||
"""
|
||||
Args:
|
||||
frame_id:
|
||||
bbox_id:
|
||||
category:
|
||||
confidence: the confidence value returned from yolo detection
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: if labels quota (top_k_labels) exceeds.
|
||||
"""
|
||||
bbox = self.find_bbox(frame_id, bbox_id)
|
||||
if not bbox.labels_full(self.top_k_labels):
|
||||
bbox.add_label(category, confidence)
|
||||
else:
|
||||
raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
|
||||
|
||||
def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
|
||||
video_name: str = None):
|
||||
self.video_details['frame_width'] = frame_width
|
||||
self.video_details['frame_height'] = frame_height
|
||||
self.video_details['frame_rate'] = frame_rate
|
||||
self.video_details['video_name'] = video_name
|
||||
|
||||
def output(self):
|
||||
output = {'video_details': self.video_details}
|
||||
result = list(self.frames.values())
|
||||
output['frames'] = [item.dic() for item in result]
|
||||
return output
|
||||
|
||||
def json_output(self, output_name):
|
||||
"""
|
||||
Args:
|
||||
output_name:
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Notes:
|
||||
It creates the json output with `output_name` name.
|
||||
"""
|
||||
if not output_name.endswith('.json'):
|
||||
output_name += '.json'
|
||||
with open(output_name, 'w') as file:
|
||||
json.dump(self.output(), file)
|
||||
file.close()
|
||||
|
||||
def set_start(self):
|
||||
self.start_time = datetime.now()
|
||||
|
||||
def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
|
||||
seconds: int = 60) -> None:
|
||||
"""
|
||||
Notes:
|
||||
Creates folder and then periodically stores the jsons on that address.
|
||||
|
||||
Args:
|
||||
output_dir (str): the directory where output files will be stored
|
||||
hours (int):
|
||||
minutes (int):
|
||||
seconds (int):
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
end = datetime.now()
|
||||
interval = 0
|
||||
interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
|
||||
interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
|
||||
interval += abs(min([seconds, JsonMeta.SECONDS]))
|
||||
diff = (end - self.start_time).seconds
|
||||
|
||||
if diff > interval:
|
||||
output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
|
||||
if not exists(output_dir):
|
||||
makedirs(output_dir)
|
||||
output = join(output_dir, output_name)
|
||||
self.json_output(output_name=output)
|
||||
self.frames = {}
|
||||
self.start_time = datetime.now()
|
||||
|
||||
def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
|
||||
"""
|
||||
saves as the number of frames quota increases higher.
|
||||
:param frames_quota:
|
||||
:param frame_counter:
|
||||
:param output_dir:
|
||||
:return:
|
||||
"""
|
||||
pass
|
||||
|
||||
def flush(self, output_dir):
|
||||
"""
|
||||
Notes:
|
||||
We use this function to output jsons whenever possible.
|
||||
like the time that we exit the while loop of opencv.
|
||||
|
||||
Args:
|
||||
output_dir:
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
|
||||
output = join(output_dir, filename)
|
||||
self.json_output(output_name=output)
|
|
@ -0,0 +1,17 @@
|
|||
import logging
|
||||
|
||||
|
||||
def get_logger(name='root'):
|
||||
formatter = logging.Formatter(
|
||||
# fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
|
||||
fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
logger = logging.getLogger(name)
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
import os
|
||||
import yaml
|
||||
from easydict import EasyDict as edict
|
||||
|
||||
class YamlParser(edict):
|
||||
"""
|
||||
This is yaml parser based on EasyDict.
|
||||
"""
|
||||
def __init__(self, cfg_dict=None, config_file=None):
|
||||
if cfg_dict is None:
|
||||
cfg_dict = {}
|
||||
|
||||
if config_file is not None:
|
||||
assert(os.path.isfile(config_file))
|
||||
with open(config_file, 'r') as fo:
|
||||
cfg_dict.update(yaml.load(fo.read()))
|
||||
|
||||
super(YamlParser, self).__init__(cfg_dict)
|
||||
|
||||
|
||||
def merge_from_file(self, config_file):
|
||||
with open(config_file, 'r') as fo:
|
||||
self.update(yaml.load(fo.read()))
|
||||
|
||||
|
||||
def merge_from_dict(self, config_dict):
|
||||
self.update(config_dict)
|
||||
|
||||
|
||||
def get_config(config_file=None):
|
||||
return YamlParser(config_file=config_file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cfg = YamlParser(config_file="../configs/yolov3.yaml")
|
||||
cfg.merge_from_file("../configs/deep_sort.yaml")
|
||||
|
||||
import ipdb; ipdb.set_trace()
|
|
@ -0,0 +1,39 @@
|
|||
from functools import wraps
|
||||
from time import time
|
||||
|
||||
|
||||
def is_video(ext: str):
|
||||
"""
|
||||
Returns true if ext exists in
|
||||
allowed_exts for video files.
|
||||
|
||||
Args:
|
||||
ext:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
|
||||
return any((ext.endswith(x) for x in allowed_exts))
|
||||
|
||||
|
||||
def tik_tok(func):
|
||||
"""
|
||||
keep track of time for each process.
|
||||
Args:
|
||||
func:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
@wraps(func)
|
||||
def _time_it(*args, **kwargs):
|
||||
start = time()
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
finally:
|
||||
end_ = time()
|
||||
print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
|
||||
|
||||
return _time_it
|
|
@ -0,0 +1,54 @@
|
|||
from AIDetector_pytorch import Detector
|
||||
import imutils
|
||||
import cv2
|
||||
|
||||
def main():
|
||||
|
||||
func_status = {}
|
||||
func_status['headpose'] = None
|
||||
|
||||
name = 'demo'
|
||||
|
||||
det = Detector()
|
||||
cap = cv2.VideoCapture('E:/视频/行人监控/test01.mp4')
|
||||
fps = int(cap.get(5))
|
||||
print('fps:', fps)
|
||||
t = int(1000/fps)
|
||||
|
||||
size = None
|
||||
videoWriter = None
|
||||
|
||||
while True:
|
||||
|
||||
# try:
|
||||
_, im = cap.read()
|
||||
if im is None:
|
||||
break
|
||||
|
||||
result = det.feedCap(im, func_status)
|
||||
result = result['frame']
|
||||
result = imutils.resize(result, height=500)
|
||||
if videoWriter is None:
|
||||
fourcc = cv2.VideoWriter_fourcc(
|
||||
'm', 'p', '4', 'v') # opencv3.0
|
||||
videoWriter = cv2.VideoWriter(
|
||||
'result.mp4', fourcc, fps, (result.shape[1], result.shape[0]))
|
||||
|
||||
videoWriter.write(result)
|
||||
cv2.imshow(name, result)
|
||||
cv2.waitKey(t)
|
||||
|
||||
if cv2.getWindowProperty(name, cv2.WND_PROP_AUTOSIZE) < 1:
|
||||
# 点x退出
|
||||
break
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# break
|
||||
|
||||
cap.release()
|
||||
videoWriter.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
main()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,114 @@
|
|||
# This file contains modules common to various models
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from utils.general import non_max_suppression
|
||||
|
||||
def autopad(k, p=None): # kernel, padding
|
||||
# Pad to 'same'
|
||||
if p is None:
|
||||
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
||||
return p
|
||||
|
||||
|
||||
def DWConv(c1, c2, k=1, s=1, act=True):
|
||||
# Depthwise convolution
|
||||
return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
|
||||
|
||||
|
||||
class Conv(nn.Module):
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super(Conv, self).__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.Hardswish() if act else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def fuseforward(self, x):
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
class Bottleneck(nn.Module):
|
||||
# Standard bottleneck
|
||||
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
|
||||
super(Bottleneck, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_, c2, 3, 1, g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class BottleneckCSP(nn.Module):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super(BottleneckCSP, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||
|
||||
|
||||
class SPP(nn.Module):
|
||||
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
super(SPP, self).__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
|
||||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||
|
||||
def forward(self, x):
|
||||
x = self.cv1(x)
|
||||
return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
|
||||
|
||||
|
||||
class Focus(nn.Module):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
|
||||
super(Focus, self).__init__()
|
||||
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
|
||||
|
||||
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
|
||||
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
|
||||
|
||||
|
||||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
super(Concat, self).__init__()
|
||||
self.d = dimension
|
||||
|
||||
def forward(self, x):
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
class NMS(nn.Module):
|
||||
# Non-Maximum Suppression (NMS) module
|
||||
conf = 0.25 # confidence threshold
|
||||
iou = 0.45 # IoU threshold
|
||||
classes = None # (optional list) filter by class
|
||||
|
||||
def __init__(self):
|
||||
super(NMS, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
# This file contains experimental modules
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from models.common import Conv, DWConv
|
||||
from utils.google_utils import attempt_download
|
||||
|
||||
|
||||
class CrossConv(nn.Module):
|
||||
# Cross Convolution Downsample
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||
super(CrossConv, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
||||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
class C3(nn.Module):
|
||||
# Cross Convolution CSP
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
||||
super(C3, self).__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
|
||||
self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
|
||||
self.cv4 = Conv(2 * c_, c2, 1, 1)
|
||||
self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
|
||||
|
||||
def forward(self, x):
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
|
||||
|
||||
|
||||
class Sum(nn.Module):
|
||||
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
||||
def __init__(self, n, weight=False): # n: number of inputs
|
||||
super(Sum, self).__init__()
|
||||
self.weight = weight # apply weights boolean
|
||||
self.iter = range(n - 1) # iter object
|
||||
if weight:
|
||||
self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
|
||||
|
||||
def forward(self, x):
|
||||
y = x[0] # no weight
|
||||
if self.weight:
|
||||
w = torch.sigmoid(self.w) * 2
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1] * w[i]
|
||||
else:
|
||||
for i in self.iter:
|
||||
y = y + x[i + 1]
|
||||
return y
|
||||
|
||||
|
||||
class GhostConv(nn.Module):
|
||||
# Ghost Convolution https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
|
||||
super(GhostConv, self).__init__()
|
||||
c_ = c2 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, k, s, None, g, act)
|
||||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
|
||||
|
||||
def forward(self, x):
|
||||
y = self.cv1(x)
|
||||
return torch.cat([y, self.cv2(y)], 1)
|
||||
|
||||
|
||||
class GhostBottleneck(nn.Module):
|
||||
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
|
||||
def __init__(self, c1, c2, k, s):
|
||||
super(GhostBottleneck, self).__init__()
|
||||
c_ = c2 // 2
|
||||
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
|
||||
DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
|
||||
GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
|
||||
self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
|
||||
Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class MixConv2d(nn.Module):
|
||||
# Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
|
||||
def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
|
||||
super(MixConv2d, self).__init__()
|
||||
groups = len(k)
|
||||
if equal_ch: # equal c_ per group
|
||||
i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
|
||||
c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
|
||||
else: # equal weight.numel() per group
|
||||
b = [c2] + [0] * groups
|
||||
a = np.eye(groups + 1, groups, k=-1)
|
||||
a -= np.roll(a, 1, axis=1)
|
||||
a *= np.array(k) ** 2
|
||||
a[0] = 1
|
||||
c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
|
||||
|
||||
self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = nn.LeakyReLU(0.1, inplace=True)
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
def __init__(self):
|
||||
super(Ensemble, self).__init__()
|
||||
|
||||
def forward(self, x, augment=False):
|
||||
y = []
|
||||
for module in self:
|
||||
y.append(module(x, augment)[0])
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.cat(y, 1) # nms ensemble
|
||||
y = torch.stack(y).mean(0) # mean ensemble
|
||||
return y, None # inference, train output
|
||||
|
||||
|
||||
def attempt_load(weights, map_location=None):
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
model = Ensemble()
|
||||
for w in weights if isinstance(weights, list) else [weights]:
|
||||
attempt_download(w)
|
||||
model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
|
||||
|
||||
# Compatibility updates
|
||||
for m in model.modules():
|
||||
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
|
||||
m.inplace = True # pytorch 1.7.0 compatibility
|
||||
elif type(m) is Conv:
|
||||
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
|
||||
|
||||
if len(model) == 1:
|
||||
return model[-1] # return model
|
||||
else:
|
||||
print('Ensemble created with %s\n' % weights)
|
||||
for k in ['names', 'stride']:
|
||||
setattr(model, k, getattr(model[-1], k))
|
||||
return model # return ensemble
|
|
@ -0,0 +1,244 @@
|
|||
import math
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append('./') # to run '$ python *.py' files in subdirectories
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, NMS
|
||||
from models.experimental import MixConv2d, CrossConv, C3
|
||||
from utils.autoanchor import check_anchor_order
|
||||
from utils.general import make_divisible
|
||||
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights
|
||||
|
||||
thop = None
|
||||
|
||||
|
||||
class Detect(nn.Module):
|
||||
stride = None # strides computed during build
|
||||
export = False # onnx export
|
||||
|
||||
def __init__(self, nc=80, anchors=(), ch=()): # detection layer
|
||||
super(Detect, self).__init__()
|
||||
self.nc = nc # number of classes
|
||||
self.no = nc + 5 # number of outputs per anchor
|
||||
self.nl = len(anchors) # number of detection layers
|
||||
self.na = len(anchors[0]) // 2 # number of anchors
|
||||
self.grid = [torch.zeros(1)] * self.nl # init grid
|
||||
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
|
||||
self.register_buffer('anchors', a) # shape(nl,na,2)
|
||||
self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
|
||||
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
|
||||
|
||||
def forward(self, x):
|
||||
# x = x.copy() # for profiling
|
||||
z = [] # inference output
|
||||
self.training |= self.export
|
||||
for i in range(self.nl):
|
||||
x[i] = self.m[i](x[i]) # conv
|
||||
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
|
||||
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
|
||||
|
||||
if not self.training: # inference
|
||||
if self.grid[i].shape[2:4] != x[i].shape[2:4]:
|
||||
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
|
||||
|
||||
y = x[i].sigmoid()
|
||||
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
|
||||
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
|
||||
z.append(y.view(bs, -1, self.no))
|
||||
|
||||
return x if self.training else (torch.cat(z, 1), x)
|
||||
|
||||
@staticmethod
|
||||
def _make_grid(nx=20, ny=20):
|
||||
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, cfg='yolov5m.yaml', ch=3, nc=None): # model, input channels, number of classes
|
||||
super(Model, self).__init__()
|
||||
if isinstance(cfg, dict):
|
||||
self.yaml = cfg # model dict
|
||||
else: # is *.yaml
|
||||
import yaml # for torch hub
|
||||
self.yaml_file = Path(cfg).name
|
||||
with open(cfg) as f:
|
||||
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
||||
|
||||
# Define model
|
||||
if nc and nc != self.yaml['nc']:
|
||||
self.yaml['nc'] = nc # override yaml value
|
||||
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out
|
||||
# print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
|
||||
|
||||
# Build strides, anchors
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, Detect):
|
||||
s = 128 # 2x min stride
|
||||
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
|
||||
m.anchors /= m.stride.view(-1, 1, 1)
|
||||
check_anchor_order(m)
|
||||
self.stride = m.stride
|
||||
self._initialize_biases() # only run once
|
||||
# print('Strides: %s' % m.stride.tolist())
|
||||
|
||||
# Init weights, biases
|
||||
initialize_weights(self)
|
||||
self.info()
|
||||
|
||||
def forward(self, x, augment=False, profile=False):
|
||||
if augment:
|
||||
img_size = x.shape[-2:] # height, width
|
||||
s = [1, 0.83, 0.67] # scales
|
||||
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||
y = [] # outputs
|
||||
for si, fi in zip(s, f):
|
||||
xi = scale_img(x.flip(fi) if fi else x, si)
|
||||
yi = self.forward_once(xi)[0] # forward
|
||||
# cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
|
||||
yi[..., :4] /= si # de-scale
|
||||
if fi == 2:
|
||||
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
|
||||
elif fi == 3:
|
||||
yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
|
||||
y.append(yi)
|
||||
return torch.cat(y, 1), None # augmented inference, train
|
||||
else:
|
||||
return self.forward_once(x, profile) # single-scale inference, train
|
||||
|
||||
def forward_once(self, x, profile=False):
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model:
|
||||
if m.f != -1: # if not from previous layer
|
||||
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
|
||||
|
||||
if profile:
|
||||
o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS
|
||||
t = time_synchronized()
|
||||
for _ in range(10):
|
||||
_ = m(x)
|
||||
dt.append((time_synchronized() - t) * 100)
|
||||
print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
|
||||
|
||||
x = m(x) # run
|
||||
y.append(x if m.i in self.save else None) # save output
|
||||
|
||||
if profile:
|
||||
print('%.1fms total' % sum(dt))
|
||||
return x
|
||||
|
||||
def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
|
||||
# https://arxiv.org/abs/1708.02002 section 3.3
|
||||
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi, s in zip(m.m, m.stride): # from
|
||||
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
|
||||
b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
|
||||
b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
|
||||
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
|
||||
|
||||
def _print_biases(self):
|
||||
m = self.model[-1] # Detect() module
|
||||
for mi in m.m: # from
|
||||
b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
|
||||
print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
|
||||
|
||||
# def _print_weights(self):
|
||||
# for m in self.model.modules():
|
||||
# if type(m) is Bottleneck:
|
||||
# print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
|
||||
|
||||
def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
|
||||
print('Fusing layers... ')
|
||||
for m in self.model.modules():
|
||||
if type(m) is Conv and hasattr(m, 'bn'):
|
||||
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
|
||||
delattr(m, 'bn') # remove batchnorm
|
||||
m.forward = m.fuseforward # update forward
|
||||
self.info()
|
||||
return self
|
||||
|
||||
def nms(self, mode=True): # add or remove NMS module
|
||||
present = type(self.model[-1]) is NMS # last layer is NMS
|
||||
if mode and not present:
|
||||
print('Adding NMS... ')
|
||||
m = NMS() # module
|
||||
m.f = -1 # from
|
||||
m.i = self.model[-1].i + 1 # index
|
||||
self.model.add_module(name='%s' % m.i, module=m) # add
|
||||
self.eval()
|
||||
elif not mode and present:
|
||||
print('Removing NMS... ')
|
||||
self.model = self.model[:-1] # remove
|
||||
return self
|
||||
|
||||
def info(self, verbose=False, img_size=640): # print model information
|
||||
model_info(self, verbose, img_size)
|
||||
|
||||
|
||||
def parse_model(d, ch): # model_dict, input_channels(3)
|
||||
anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
|
||||
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
|
||||
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
|
||||
|
||||
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
|
||||
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
|
||||
m = eval(m) if isinstance(m, str) else m # eval strings
|
||||
for j, a in enumerate(args):
|
||||
try:
|
||||
args[j] = eval(a) if isinstance(a, str) else a # eval strings
|
||||
except:
|
||||
pass
|
||||
|
||||
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
||||
if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
|
||||
c1, c2 = ch[f], args[0]
|
||||
|
||||
# Normal
|
||||
# if i > 0 and args[0] != no: # channel expansion factor
|
||||
# ex = 1.75 # exponential (default 2.0)
|
||||
# e = math.log(c2 / ch[1]) / math.log(2)
|
||||
# c2 = int(ch[1] * ex ** e)
|
||||
# if m != Focus:
|
||||
|
||||
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
|
||||
|
||||
# Experimental
|
||||
# if i > 0 and args[0] != no: # channel expansion factor
|
||||
# ex = 1 + gw # exponential (default 2.0)
|
||||
# ch1 = 32 # ch[1]
|
||||
# e = math.log(c2 / ch1) / math.log(2) # level 1-n
|
||||
# c2 = int(ch1 * ex ** e)
|
||||
# if m != Focus:
|
||||
# c2 = make_divisible(c2, 8) if c2 != no else c2
|
||||
|
||||
args = [c1, c2, *args[1:]]
|
||||
if m in [BottleneckCSP, C3]:
|
||||
args.insert(2, n)
|
||||
n = 1
|
||||
elif m is nn.BatchNorm2d:
|
||||
args = [ch[f]]
|
||||
elif m is Concat:
|
||||
c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
|
||||
elif m is Detect:
|
||||
args.append([ch[x + 1] for x in f])
|
||||
if isinstance(args[1], int): # number of anchors
|
||||
args[1] = [list(range(args[1] * 2))] * len(f)
|
||||
else:
|
||||
c2 = ch[f]
|
||||
|
||||
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
|
||||
t = str(m)[8:-2].replace('__main__.', '') # module type
|
||||
np = sum([x.numel() for x in m_.parameters()]) # number params
|
||||
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
|
||||
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
|
||||
layers.append(m_)
|
||||
ch.append(c2)
|
||||
return nn.Sequential(*layers), sorted(save)
|
||||
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# parameters
|
||||
nc: 8 # number of classes
|
||||
depth_multiple: 0.67 # model depth multiple
|
||||
width_multiple: 0.75 # layer channel multiple
|
||||
|
||||
# anchors
|
||||
anchors:
|
||||
- [10,13, 16,30, 33,23] # P3/8
|
||||
- [30,61, 62,45, 59,119] # P4/16
|
||||
- [116,90, 156,198, 373,326] # P5/32
|
||||
|
||||
# YOLOv5 backbone
|
||||
backbone:
|
||||
# [from, number, module, args]
|
||||
[[-1, 1, Focus, [64, 3]], # 0-P1/2
|
||||
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
|
||||
[-1, 3, BottleneckCSP, [128]],
|
||||
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
|
||||
[-1, 9, BottleneckCSP, [256]],
|
||||
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
|
||||
[-1, 9, BottleneckCSP, [512]],
|
||||
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
|
||||
[-1, 1, SPP, [1024, [5, 9, 13]]],
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 9
|
||||
]
|
||||
|
||||
# YOLOv5 head
|
||||
head:
|
||||
[[-1, 1, Conv, [512, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 6], 1, Concat, [1]], # cat backbone P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 13
|
||||
|
||||
[-1, 1, Conv, [256, 1, 1]],
|
||||
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
|
||||
[[-1, 4], 1, Concat, [1]], # cat backbone P3
|
||||
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
|
||||
|
||||
[-1, 1, Conv, [256, 3, 2]],
|
||||
[[-1, 14], 1, Concat, [1]], # cat head P4
|
||||
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
|
||||
|
||||
[-1, 1, Conv, [512, 3, 2]],
|
||||
[[-1, 10], 1, Concat, [1]], # cat head P5
|
||||
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
|
||||
|
||||
[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
|
||||
]
|
|
@ -0,0 +1,74 @@
|
|||
from deep_sort.utils.parser import get_config
|
||||
from deep_sort.deep_sort import DeepSort
|
||||
import torch
|
||||
import cv2
|
||||
|
||||
palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
|
||||
cfg = get_config()
|
||||
cfg.merge_from_file("deep_sort/configs/deep_sort.yaml")
|
||||
deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
|
||||
max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
|
||||
nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
|
||||
max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET,
|
||||
use_cuda=True)
|
||||
|
||||
|
||||
def plot_bboxes(image, bboxes, line_thickness=None):
|
||||
# Plots one bounding box on image img
|
||||
tl = line_thickness or round(
|
||||
0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 # line/font thickness
|
||||
for (x1, y1, x2, y2, cls_id, pos_id) in bboxes:
|
||||
if cls_id in ['smoke', 'phone', 'eat']:
|
||||
color = (0, 0, 255)
|
||||
else:
|
||||
color = (0, 255, 0)
|
||||
if cls_id == 'eat':
|
||||
cls_id = 'eat-drink'
|
||||
c1, c2 = (x1, y1), (x2, y2)
|
||||
cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
|
||||
tf = max(tl - 1, 1) # font thickness
|
||||
t_size = cv2.getTextSize(cls_id, 0, fontScale=tl / 3, thickness=tf)[0]
|
||||
c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
|
||||
cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) # filled
|
||||
cv2.putText(image, '{} ID-{}'.format(cls_id, pos_id), (c1[0], c1[1] - 2), 0, tl / 3,
|
||||
[225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def update_tracker(target_detector, image):
|
||||
|
||||
new_faces = []
|
||||
_, bboxes = target_detector.detect(image)
|
||||
|
||||
bbox_xywh = []
|
||||
confs = []
|
||||
|
||||
# Adapt detections to deep sort input format
|
||||
for x1, y1, x2, y2, _, conf in bboxes:
|
||||
|
||||
obj = [
|
||||
int((x1+x2)/2), int((y1+y2)/2),
|
||||
x2-x1, y2-y1
|
||||
]
|
||||
bbox_xywh.append(obj)
|
||||
confs.append(conf)
|
||||
|
||||
xywhs = torch.Tensor(bbox_xywh)
|
||||
confss = torch.Tensor(confs)
|
||||
|
||||
# Pass detections to deepsort
|
||||
outputs = deepsort.update(xywhs, confss, image)
|
||||
|
||||
|
||||
bboxes2draw = []
|
||||
face_bboxes = []
|
||||
for value in list(outputs):
|
||||
x1,y1,x2,y2,track_id = value
|
||||
bboxes2draw.append(
|
||||
(x1, y1, x2, y2, '', track_id)
|
||||
)
|
||||
|
||||
image = plot_bboxes(image, bboxes2draw)
|
||||
|
||||
return image, new_faces, face_bboxes
|
|
@ -0,0 +1,50 @@
|
|||
from tracker import update_tracker
|
||||
import cv2
|
||||
|
||||
|
||||
class baseDet(object):
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.img_size = 640
|
||||
self.threshold = 0.3
|
||||
self.stride = 1
|
||||
|
||||
def build_config(self):
|
||||
|
||||
self.faceTracker = {}
|
||||
self.faceClasses = {}
|
||||
self.faceLocation1 = {}
|
||||
self.faceLocation2 = {}
|
||||
self.frameCounter = 0
|
||||
self.currentCarID = 0
|
||||
self.recorded = []
|
||||
|
||||
self.font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
|
||||
def feedCap(self, im, func_status):
|
||||
|
||||
retDict = {
|
||||
'frame': None,
|
||||
'faces': None,
|
||||
'list_of_ids': None,
|
||||
'face_bboxes': []
|
||||
}
|
||||
self.frameCounter += 1
|
||||
|
||||
im, faces, face_bboxes = update_tracker(self, im)
|
||||
|
||||
retDict['frame'] = im
|
||||
retDict['faces'] = faces
|
||||
retDict['face_bboxes'] = face_bboxes
|
||||
|
||||
return retDict
|
||||
|
||||
def init_model(self):
|
||||
raise EOFError("Undefined model type.")
|
||||
|
||||
def preprocess(self):
|
||||
raise EOFError("Undefined model type.")
|
||||
|
||||
def detect(self):
|
||||
raise EOFError("Undefined model type.")
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,72 @@
|
|||
# Activation functions
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
# Swish https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------
|
||||
class Swish(nn.Module): #
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
|
||||
class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
# return x * F.hardsigmoid(x) # for torchscript and CoreML
|
||||
return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
|
||||
|
||||
|
||||
class MemoryEfficientSwish(nn.Module):
|
||||
class F(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
ctx.save_for_backward(x)
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
x = ctx.saved_tensors[0]
|
||||
sx = torch.sigmoid(x)
|
||||
return grad_output * (sx * (1 + x * (1 - sx)))
|
||||
|
||||
def forward(self, x):
|
||||
return self.F.apply(x)
|
||||
|
||||
|
||||
# Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
|
||||
class Mish(nn.Module):
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
return x * F.softplus(x).tanh()
|
||||
|
||||
|
||||
class MemoryEfficientMish(nn.Module):
|
||||
class F(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
ctx.save_for_backward(x)
|
||||
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
x = ctx.saved_tensors[0]
|
||||
sx = torch.sigmoid(x)
|
||||
fx = F.softplus(x).tanh()
|
||||
return grad_output * (fx + x * sx * (1 - fx * fx))
|
||||
|
||||
def forward(self, x):
|
||||
return self.F.apply(x)
|
||||
|
||||
|
||||
# FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
|
||||
class FReLU(nn.Module):
|
||||
def __init__(self, c1, k=3): # ch_in, kernel
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1)
|
||||
self.bn = nn.BatchNorm2d(c1)
|
||||
|
||||
def forward(self, x):
|
||||
return torch.max(x, self.bn(self.conv(x)))
|
|
@ -0,0 +1,151 @@
|
|||
# Auto-anchor utils
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import yaml
|
||||
from scipy.cluster.vq import kmeans
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def check_anchor_order(m):
|
||||
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
|
||||
a = m.anchor_grid.prod(-1).view(-1) # anchor area
|
||||
da = a[-1] - a[0] # delta a
|
||||
ds = m.stride[-1] - m.stride[0] # delta s
|
||||
if da.sign() != ds.sign(): # same order
|
||||
print('Reversing anchor order')
|
||||
m.anchors[:] = m.anchors.flip(0)
|
||||
m.anchor_grid[:] = m.anchor_grid.flip(0)
|
||||
|
||||
|
||||
def check_anchors(dataset, model, thr=4.0, imgsz=640):
|
||||
# Check anchor fit to data, recompute if necessary
|
||||
print('\nAnalyzing anchors... ', end='')
|
||||
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
|
||||
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
||||
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
|
||||
wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
|
||||
|
||||
def metric(k): # compute metric
|
||||
r = wh[:, None] / k[None]
|
||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
||||
best = x.max(1)[0] # best_x
|
||||
aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
|
||||
bpr = (best > 1. / thr).float().mean() # best possible recall
|
||||
return bpr, aat
|
||||
|
||||
bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
|
||||
print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
|
||||
if bpr < 0.98: # threshold to recompute
|
||||
print('. Attempting to improve anchors, please wait...')
|
||||
na = m.anchor_grid.numel() // 2 # number of anchors
|
||||
new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
|
||||
new_bpr = metric(new_anchors.reshape(-1, 2))[0]
|
||||
if new_bpr > bpr: # replace anchors
|
||||
new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
|
||||
m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference
|
||||
m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
|
||||
check_anchor_order(m)
|
||||
print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
|
||||
else:
|
||||
print('Original anchors better than new anchors. Proceeding with original anchors.')
|
||||
print('') # newline
|
||||
|
||||
|
||||
def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
|
||||
""" Creates kmeans-evolved anchors from training dataset
|
||||
|
||||
Arguments:
|
||||
path: path to dataset *.yaml, or a loaded dataset
|
||||
n: number of anchors
|
||||
img_size: image size used for training
|
||||
thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
|
||||
gen: generations to evolve anchors using genetic algorithm
|
||||
verbose: print all results
|
||||
|
||||
Return:
|
||||
k: kmeans evolved anchors
|
||||
|
||||
Usage:
|
||||
from utils.autoanchor import *; _ = kmean_anchors()
|
||||
"""
|
||||
thr = 1. / thr
|
||||
|
||||
def metric(k, wh): # compute metrics
|
||||
r = wh[:, None] / k[None]
|
||||
x = torch.min(r, 1. / r).min(2)[0] # ratio metric
|
||||
# x = wh_iou(wh, torch.tensor(k)) # iou metric
|
||||
return x, x.max(1)[0] # x, best_x
|
||||
|
||||
def anchor_fitness(k): # mutation fitness
|
||||
_, best = metric(torch.tensor(k, dtype=torch.float32), wh)
|
||||
return (best * (best > thr).float()).mean() # fitness
|
||||
|
||||
def print_results(k):
|
||||
k = k[np.argsort(k.prod(1))] # sort small to large
|
||||
x, best = metric(k, wh0)
|
||||
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
|
||||
print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
|
||||
print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
|
||||
(n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
|
||||
for i, x in enumerate(k):
|
||||
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
|
||||
return k
|
||||
|
||||
if isinstance(path, str): # *.yaml file
|
||||
with open(path) as f:
|
||||
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
|
||||
from utils.datasets import LoadImagesAndLabels
|
||||
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
|
||||
else:
|
||||
dataset = path # dataset
|
||||
|
||||
# Get label wh
|
||||
shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
||||
wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
|
||||
|
||||
# Filter
|
||||
i = (wh0 < 3.0).any(1).sum()
|
||||
if i:
|
||||
print('WARNING: Extremely small objects found. '
|
||||
'%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
|
||||
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
|
||||
|
||||
# Kmeans calculation
|
||||
print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
|
||||
s = wh.std(0) # sigmas for whitening
|
||||
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
|
||||
k *= s
|
||||
wh = torch.tensor(wh, dtype=torch.float32) # filtered
|
||||
wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
|
||||
k = print_results(k)
|
||||
|
||||
# Plot
|
||||
# k, d = [None] * 20, [None] * 20
|
||||
# for i in tqdm(range(1, 21)):
|
||||
# k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
|
||||
# fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
|
||||
# ax = ax.ravel()
|
||||
# ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
|
||||
# fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
|
||||
# ax[0].hist(wh[wh[:, 0]<100, 0],400)
|
||||
# ax[1].hist(wh[wh[:, 1]<100, 1],400)
|
||||
# fig.savefig('wh.png', dpi=200)
|
||||
|
||||
# Evolve
|
||||
npr = np.random
|
||||
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
|
||||
pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
|
||||
for _ in pbar:
|
||||
v = np.ones(sh)
|
||||
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
|
||||
v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
|
||||
kg = (k.copy() * v).clip(min=2.0)
|
||||
fg = anchor_fitness(kg)
|
||||
if fg > f:
|
||||
f, k = fg, kg.copy()
|
||||
pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
|
||||
if verbose:
|
||||
print_results(k)
|
||||
|
||||
return print_results(k)
|
|
@ -0,0 +1,502 @@
|
|||
# General utils
|
||||
|
||||
import glob
|
||||
import os
|
||||
import platform
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import math
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision
|
||||
import yaml
|
||||
|
||||
from utils.google_utils import gsutil_getsize
|
||||
from utils.metrics import fitness
|
||||
from utils.torch_utils import init_torch_seeds
|
||||
|
||||
# Settings
|
||||
torch.set_printoptions(linewidth=320, precision=5, profile='long')
|
||||
# format short g, %precision=5
|
||||
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})
|
||||
# prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
|
||||
cv2.setNumThreads(0)
|
||||
|
||||
|
||||
def init_seeds(seed=0):
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
init_torch_seeds(seed)
|
||||
|
||||
|
||||
def get_latest_run(search_dir='.'):
|
||||
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
||||
last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
|
||||
return max(last_list, key=os.path.getctime) if last_list else ''
|
||||
|
||||
|
||||
def check_git_status():
|
||||
# Suggest 'git pull' if repo is out of date
|
||||
if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'):
|
||||
s = subprocess.check_output(
|
||||
'if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8')
|
||||
if 'Your branch is behind' in s:
|
||||
print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n')
|
||||
|
||||
|
||||
def check_img_size(img_size, s=32):
|
||||
# Verify img_size is a multiple of stride s
|
||||
new_size = make_divisible(img_size, int(s)) # ceil gs-multiple
|
||||
if new_size != img_size:
|
||||
print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' %
|
||||
(img_size, s, new_size))
|
||||
return new_size
|
||||
|
||||
|
||||
def check_file(file):
|
||||
# Search for file if not found
|
||||
if os.path.isfile(file) or file == '':
|
||||
return file
|
||||
else:
|
||||
files = glob.glob('./**/' + file, recursive=True) # find file
|
||||
assert len(files), 'File Not Found: %s' % file # assert file was found
|
||||
assert len(files) == 1, "Multiple files match '%s', specify exact path: %s" % (
|
||||
file, files) # assert unique
|
||||
return files[0] # return file
|
||||
|
||||
|
||||
def check_dataset(dict):
|
||||
# Download dataset if not found locally
|
||||
val, s = dict.get('val'), dict.get('download')
|
||||
if val and len(val):
|
||||
val = [Path(x).resolve()
|
||||
for x in (val if isinstance(val, list) else [val])] # val path
|
||||
if not all(x.exists() for x in val):
|
||||
print('\nWARNING: Dataset not found, nonexistent paths: %s' %
|
||||
[str(x) for x in val if not x.exists()])
|
||||
if s and len(s): # download script
|
||||
print('Downloading %s ...' % s)
|
||||
if s.startswith('http') and s.endswith('.zip'): # URL
|
||||
f = Path(s).name # filename
|
||||
torch.hub.download_url_to_file(s, f)
|
||||
r = os.system('unzip -q %s -d ../ && rm %s' %
|
||||
(f, f)) # unzip
|
||||
else: # bash script
|
||||
r = os.system(s)
|
||||
print('Dataset autodownload %s\n' % ('success' if r ==
|
||||
0 else 'failure')) # analyze return value
|
||||
else:
|
||||
raise Exception('Dataset not found.')
|
||||
|
||||
|
||||
def make_divisible(x, divisor):
|
||||
# Returns x evenly divisible by divisor
|
||||
return math.ceil(x / divisor) * divisor
|
||||
|
||||
|
||||
def labels_to_class_weights(labels, nc=80):
|
||||
# Get class weights (inverse frequency) from training labels
|
||||
if labels[0] is None: # no labels loaded
|
||||
return torch.Tensor()
|
||||
|
||||
labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO
|
||||
classes = labels[:, 0].astype(np.int) # labels = [class xywh]
|
||||
weights = np.bincount(classes, minlength=nc) # occurrences per class
|
||||
|
||||
# Prepend gridpoint count (for uCE training)
|
||||
# gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image
|
||||
# weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start
|
||||
|
||||
weights[weights == 0] = 1 # replace empty bins with 1
|
||||
weights = 1 / weights # number of targets per class
|
||||
weights /= weights.sum() # normalize
|
||||
return torch.from_numpy(weights)
|
||||
|
||||
|
||||
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
|
||||
# Produces image weights based on class_weights and image contents
|
||||
class_counts = np.array(
|
||||
[np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
|
||||
image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
|
||||
# index = random.choices(range(n), weights=image_weights, k=1) # weight image sample
|
||||
return image_weights
|
||||
|
||||
|
||||
def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
||||
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
|
||||
# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
|
||||
# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
|
||||
# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco
|
||||
# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet
|
||||
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
|
||||
35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
|
||||
return x
|
||||
|
||||
|
||||
def xyxy2xywh(x):
|
||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
|
||||
y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
|
||||
y[:, 2] = x[:, 2] - x[:, 0] # width
|
||||
y[:, 3] = x[:, 3] - x[:, 1] # height
|
||||
return y
|
||||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
|
||||
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
|
||||
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
|
||||
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
|
||||
return y
|
||||
|
||||
|
||||
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
|
||||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
||||
if ratio_pad is None: # calculate from img0_shape
|
||||
gain = min(img1_shape[0] / img0_shape[0],
|
||||
img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||
pad = (img1_shape[1] - img0_shape[1] * gain) / \
|
||||
2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||
else:
|
||||
gain = ratio_pad[0][0]
|
||||
pad = ratio_pad[1]
|
||||
|
||||
coords[:, [0, 2]] -= pad[0] # x padding
|
||||
coords[:, [1, 3]] -= pad[1] # y padding
|
||||
coords[:, :4] /= gain
|
||||
clip_coords(coords, img0_shape)
|
||||
return coords
|
||||
|
||||
|
||||
def clip_coords(boxes, img_shape):
|
||||
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
||||
boxes[:, 0].clamp_(0, img_shape[1]) # x1
|
||||
boxes[:, 1].clamp_(0, img_shape[0]) # y1
|
||||
boxes[:, 2].clamp_(0, img_shape[1]) # x2
|
||||
boxes[:, 3].clamp_(0, img_shape[0]) # y2
|
||||
|
||||
|
||||
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-9):
|
||||
# Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
|
||||
box2 = box2.T
|
||||
|
||||
# Get the coordinates of bounding boxes
|
||||
if x1y1x2y2: # x1, y1, x2, y2 = box1
|
||||
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
|
||||
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
|
||||
else: # transform from xywh to xyxy
|
||||
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
|
||||
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
|
||||
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
|
||||
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
|
||||
|
||||
# Intersection area
|
||||
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
|
||||
(torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
|
||||
|
||||
# Union Area
|
||||
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
|
||||
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
|
||||
union = w1 * h1 + w2 * h2 - inter + eps
|
||||
|
||||
iou = inter / union
|
||||
if GIoU or DIoU or CIoU:
|
||||
# convex (smallest enclosing box) width
|
||||
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
|
||||
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height
|
||||
if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
|
||||
c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared
|
||||
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
|
||||
(b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared
|
||||
if DIoU:
|
||||
return iou - rho2 / c2 # DIoU
|
||||
elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
|
||||
v = (4 / math.pi ** 2) * \
|
||||
torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
|
||||
with torch.no_grad():
|
||||
alpha = v / ((1 + eps) - iou + v)
|
||||
return iou - (rho2 / c2 + v * alpha) # CIoU
|
||||
else: # GIoU https://arxiv.org/pdf/1902.09630.pdf
|
||||
c_area = cw * ch + eps # convex area
|
||||
return iou - (c_area - union) / c_area # GIoU
|
||||
else:
|
||||
return iou # IoU
|
||||
|
||||
|
||||
def box_iou(box1, box2):
|
||||
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
|
||||
"""
|
||||
Return intersection-over-union (Jaccard index) of boxes.
|
||||
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
||||
Arguments:
|
||||
box1 (Tensor[N, 4])
|
||||
box2 (Tensor[M, 4])
|
||||
Returns:
|
||||
iou (Tensor[N, M]): the NxM matrix containing the pairwise
|
||||
IoU values for every element in boxes1 and boxes2
|
||||
"""
|
||||
|
||||
def box_area(box):
|
||||
# box = 4xn
|
||||
return (box[2] - box[0]) * (box[3] - box[1])
|
||||
|
||||
area1 = box_area(box1.T)
|
||||
area2 = box_area(box2.T)
|
||||
|
||||
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
|
||||
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
|
||||
torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
|
||||
# iou = inter / (area1 + area2 - inter)
|
||||
return inter / (area1[:, None] + area2 - inter)
|
||||
|
||||
|
||||
def wh_iou(wh1, wh2):
|
||||
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
|
||||
wh1 = wh1[:, None] # [N,1,2]
|
||||
wh2 = wh2[None] # [1,M,2]
|
||||
inter = torch.min(wh1, wh2).prod(2) # [N,M]
|
||||
# iou = inter / (area1 + area2 - inter)
|
||||
return inter / (wh1.prod(2) + wh2.prod(2) - inter)
|
||||
|
||||
|
||||
def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, classes=None, agnostic=False, labels=()):
|
||||
"""Performs Non-Maximum Suppression (NMS) on inference results
|
||||
|
||||
Returns:
|
||||
detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
|
||||
"""
|
||||
|
||||
nc = prediction[0].shape[1] - 5 # number of classes
|
||||
xc = prediction[..., 4] > conf_thres # candidates
|
||||
|
||||
# Settings
|
||||
# (pixels) minimum and maximum box width and height
|
||||
min_wh, max_wh = 2, 4096
|
||||
max_det = 300 # maximum number of detections per image
|
||||
time_limit = 10.0 # seconds to quit after
|
||||
redundant = True # require redundant detections
|
||||
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
|
||||
merge = False # use merge-NMS
|
||||
|
||||
t = time.time()
|
||||
output = [torch.zeros(0, 6)] * prediction.shape[0]
|
||||
for xi, x in enumerate(prediction): # image index, image inference
|
||||
# Apply constraints
|
||||
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
||||
x = x[xc[xi]] # confidence
|
||||
|
||||
# Cat apriori labels if autolabelling
|
||||
if labels and len(labels[xi]):
|
||||
l = labels[xi]
|
||||
v = torch.zeros((len(l), nc + 5), device=x.device)
|
||||
v[:, :4] = l[:, 1:5] # box
|
||||
v[:, 4] = 1.0 # conf
|
||||
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
|
||||
x = torch.cat((x, v), 0)
|
||||
|
||||
# If none remain process next image
|
||||
if not x.shape[0]:
|
||||
continue
|
||||
|
||||
# Compute conf
|
||||
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
|
||||
|
||||
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
|
||||
box = xywh2xyxy(x[:, :4])
|
||||
|
||||
# Detections matrix nx6 (xyxy, conf, cls)
|
||||
if multi_label:
|
||||
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
|
||||
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
|
||||
else: # best class only
|
||||
conf, j = x[:, 5:].max(1, keepdim=True)
|
||||
x = torch.cat((box, conf, j.float()), 1)[
|
||||
conf.view(-1) > conf_thres]
|
||||
|
||||
# Filter by class
|
||||
if classes:
|
||||
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
||||
|
||||
# Apply finite constraint
|
||||
# if not torch.isfinite(x).all():
|
||||
# x = x[torch.isfinite(x).all(1)]
|
||||
|
||||
# If none remain process next image
|
||||
n = x.shape[0] # number of boxes
|
||||
if not n:
|
||||
continue
|
||||
|
||||
# Sort by confidence
|
||||
# x = x[x[:, 4].argsort(descending=True)]
|
||||
|
||||
# Batched NMS
|
||||
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
||||
# boxes (offset by class), scores
|
||||
boxes, scores = x[:, :4] + c, x[:, 4]
|
||||
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
||||
if i.shape[0] > max_det: # limit detections
|
||||
i = i[:max_det]
|
||||
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
||||
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
||||
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
||||
weights = iou * scores[None] # box weights
|
||||
x[i, :4] = torch.mm(weights, x[:, :4]).float(
|
||||
) / weights.sum(1, keepdim=True) # merged boxes
|
||||
if redundant:
|
||||
i = i[iou.sum(1) > 1] # require redundancy
|
||||
|
||||
output[xi] = x[i]
|
||||
if (time.time() - t) > time_limit:
|
||||
break # time limit exceeded
|
||||
|
||||
return output
|
||||
|
||||
|
||||
# from utils.general import *; strip_optimizer()
|
||||
def strip_optimizer(f='weights/best.pt', s=''):
|
||||
# Strip optimizer from 'f' to finalize training, optionally save as 's'
|
||||
x = torch.load(f, map_location=torch.device('cpu'))
|
||||
x['optimizer'] = None
|
||||
x['training_results'] = None
|
||||
x['epoch'] = -1
|
||||
x['model'].half() # to FP16
|
||||
for p in x['model'].parameters():
|
||||
p.requires_grad = False
|
||||
torch.save(x, s or f)
|
||||
mb = os.path.getsize(s or f) / 1E6 # filesize
|
||||
print('Optimizer stripped from %s,%s %.1fMB' %
|
||||
(f, (' saved as %s,' % s) if s else '', mb))
|
||||
|
||||
|
||||
def print_mutation(hyp, results, yaml_file='hyp_evolved.yaml', bucket=''):
|
||||
# Print mutation results to evolve.txt (for use with train.py --evolve)
|
||||
a = '%10s' * len(hyp) % tuple(hyp.keys()) # hyperparam keys
|
||||
b = '%10.3g' * len(hyp) % tuple(hyp.values()) # hyperparam values
|
||||
# results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
|
||||
c = '%10.4g' * len(results) % results
|
||||
print('\n%s\n%s\nEvolved fitness: %s\n' % (a, b, c))
|
||||
|
||||
if bucket:
|
||||
url = 'gs://%s/evolve.txt' % bucket
|
||||
if gsutil_getsize(url) > (os.path.getsize('evolve.txt') if os.path.exists('evolve.txt') else 0):
|
||||
# download evolve.txt if larger than local
|
||||
os.system('gsutil cp %s .' % url)
|
||||
|
||||
with open('evolve.txt', 'a') as f: # append result
|
||||
f.write(c + b + '\n')
|
||||
x = np.unique(np.loadtxt('evolve.txt', ndmin=2),
|
||||
axis=0) # load unique rows
|
||||
x = x[np.argsort(-fitness(x))] # sort
|
||||
np.savetxt('evolve.txt', x, '%10.3g') # save sort by fitness
|
||||
|
||||
# Save yaml
|
||||
for i, k in enumerate(hyp.keys()):
|
||||
hyp[k] = float(x[0, i + 7])
|
||||
with open(yaml_file, 'w') as f:
|
||||
results = tuple(x[0, :7])
|
||||
# results (P, R, mAP@0.5, mAP@0.5:0.95, val_losses x 3)
|
||||
c = '%10.4g' * len(results) % results
|
||||
f.write('# Hyperparameter Evolution Results\n# Generations: %g\n# Metrics: ' % len(
|
||||
x) + c + '\n\n')
|
||||
yaml.dump(hyp, f, sort_keys=False)
|
||||
|
||||
if bucket:
|
||||
os.system('gsutil cp evolve.txt %s gs://%s' %
|
||||
(yaml_file, bucket)) # upload
|
||||
|
||||
|
||||
def apply_classifier(x, model, img, im0):
|
||||
# applies a second stage classifier to yolo outputs
|
||||
im0 = [im0] if isinstance(im0, np.ndarray) else im0
|
||||
for i, d in enumerate(x): # per image
|
||||
if d is not None and len(d):
|
||||
d = d.clone()
|
||||
|
||||
# Reshape and pad cutouts
|
||||
b = xyxy2xywh(d[:, :4]) # boxes
|
||||
b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square
|
||||
b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad
|
||||
d[:, :4] = xywh2xyxy(b).long()
|
||||
|
||||
# Rescale boxes from img_size to im0 size
|
||||
scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
|
||||
|
||||
# Classes
|
||||
pred_cls1 = d[:, 5].long()
|
||||
ims = []
|
||||
for j, a in enumerate(d): # per item
|
||||
cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
|
||||
im = cv2.resize(cutout, (224, 224)) # BGR
|
||||
# cv2.imwrite('test%i.jpg' % j, cutout)
|
||||
|
||||
# BGR to RGB, to 3x416x416
|
||||
im = im[:, :, ::-1].transpose(2, 0, 1)
|
||||
im = np.ascontiguousarray(
|
||||
im, dtype=np.float32) # uint8 to float32
|
||||
im /= 255.0 # 0 - 255 to 0.0 - 1.0
|
||||
ims.append(im)
|
||||
|
||||
pred_cls2 = model(torch.Tensor(ims).to(d.device)
|
||||
).argmax(1) # classifier prediction
|
||||
# retain matching class detections
|
||||
x[i] = x[i][pred_cls1 == pred_cls2]
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def increment_path(path, exist_ok=True, sep=''):
|
||||
# Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.
|
||||
path = Path(path) # os-agnostic
|
||||
if (path.exists() and exist_ok) or (not path.exists()):
|
||||
return str(path)
|
||||
else:
|
||||
dirs = glob.glob(f"{path}{sep}*") # similar paths
|
||||
matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
|
||||
i = [int(m.groups()[0]) for m in matches if m] # indices
|
||||
n = max(i) + 1 if i else 2 # increment number
|
||||
return f"{path}{sep}{n}" # update path
|
||||
|
||||
|
||||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
|
||||
shape = img.shape[:2] # current shape [height, width]
|
||||
if isinstance(new_shape, int):
|
||||
new_shape = (new_shape, new_shape)
|
||||
|
||||
# Scale ratio (new / old)
|
||||
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
if not scaleup: # only scale down, do not scale up (for better test mAP)
|
||||
r = min(r, 1.0)
|
||||
|
||||
# Compute padding
|
||||
ratio = r, r # width, height ratios
|
||||
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - \
|
||||
new_unpad[1] # wh padding
|
||||
if auto: # minimum rectangle
|
||||
dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
|
||||
elif scaleFill: # stretch
|
||||
dw, dh = 0.0, 0.0
|
||||
new_unpad = (new_shape[1], new_shape[0])
|
||||
ratio = new_shape[1] / shape[1], new_shape[0] / \
|
||||
shape[0] # width, height ratios
|
||||
|
||||
dw /= 2 # divide padding into 2 sides
|
||||
dh /= 2
|
||||
|
||||
if shape[::-1] != new_unpad: # resize
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
img = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
||||
return img, ratio, (dw, dh)
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
# Google utils: https://cloud.google.com/storage/docs/reference/libraries
|
||||
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def gsutil_getsize(url=''):
|
||||
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
|
||||
s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
|
||||
return eval(s.split(' ')[0]) if len(s) else 0 # bytes
|
||||
|
||||
|
||||
def attempt_download(weights):
|
||||
# Attempt to download pretrained weights if not found locally
|
||||
weights = weights.strip().replace("'", '')
|
||||
file = Path(weights).name.lower()
|
||||
|
||||
msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/'
|
||||
models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models
|
||||
redundant = False # offer second download option
|
||||
|
||||
if file in models and not os.path.isfile(weights):
|
||||
# Google Drive
|
||||
# d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',
|
||||
# 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',
|
||||
# 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',
|
||||
# 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'}
|
||||
# r = gdrive_download(id=d[file], name=weights) if file in d else 1
|
||||
# if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check
|
||||
# return
|
||||
|
||||
try: # GitHub
|
||||
url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file
|
||||
print('Downloading %s to %s...' % (url, weights))
|
||||
torch.hub.download_url_to_file(url, weights)
|
||||
assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check
|
||||
except Exception as e: # GCP
|
||||
print('Download error: %s' % e)
|
||||
assert redundant, 'No secondary mirror'
|
||||
url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file
|
||||
print('Downloading %s to %s...' % (url, weights))
|
||||
r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights)
|
||||
finally:
|
||||
if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check
|
||||
os.remove(weights) if os.path.exists(weights) else None # remove partial downloads
|
||||
print('ERROR: Download failure: %s' % msg)
|
||||
print('')
|
||||
return
|
||||
|
||||
|
||||
def gdrive_download(id='1n_oKgR81BJtqk75b00eAjdv03qVCQn2f', name='coco128.zip'):
|
||||
# Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
|
||||
t = time.time()
|
||||
|
||||
print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
|
||||
os.remove(name) if os.path.exists(name) else None # remove existing
|
||||
os.remove('cookie') if os.path.exists('cookie') else None
|
||||
|
||||
# Attempt file download
|
||||
out = "NUL" if platform.system() == "Windows" else "/dev/null"
|
||||
os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
|
||||
if os.path.exists('cookie'): # large file
|
||||
s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
|
||||
else: # small file
|
||||
s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
|
||||
r = os.system(s) # execute, capture return
|
||||
os.remove('cookie') if os.path.exists('cookie') else None
|
||||
|
||||
# Error check
|
||||
if r != 0:
|
||||
os.remove(name) if os.path.exists(name) else None # remove partial
|
||||
print('Download error ') # raise Exception('Download error')
|
||||
return r
|
||||
|
||||
# Unzip if archive
|
||||
if name.endswith('.zip'):
|
||||
print('unzipping... ', end='')
|
||||
os.system('unzip -q %s' % name) # unzip
|
||||
os.remove(name) # remove zip to free space
|
||||
|
||||
print('Done (%.1fs)' % (time.time() - t))
|
||||
return r
|
||||
|
||||
|
||||
def get_token(cookie="./cookie"):
|
||||
with open(cookie) as f:
|
||||
for line in f:
|
||||
if "download" in line:
|
||||
return line.split()[-1]
|
||||
return ""
|
||||
|
||||
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
|
||||
# # Uploads a file to a bucket
|
||||
# # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
|
||||
#
|
||||
# storage_client = storage.Client()
|
||||
# bucket = storage_client.get_bucket(bucket_name)
|
||||
# blob = bucket.blob(destination_blob_name)
|
||||
#
|
||||
# blob.upload_from_filename(source_file_name)
|
||||
#
|
||||
# print('File {} uploaded to {}.'.format(
|
||||
# source_file_name,
|
||||
# destination_blob_name))
|
||||
#
|
||||
#
|
||||
# def download_blob(bucket_name, source_blob_name, destination_file_name):
|
||||
# # Uploads a blob from a bucket
|
||||
# storage_client = storage.Client()
|
||||
# bucket = storage_client.get_bucket(bucket_name)
|
||||
# blob = bucket.blob(source_blob_name)
|
||||
#
|
||||
# blob.download_to_filename(destination_file_name)
|
||||
#
|
||||
# print('Blob {} downloaded to {}.'.format(
|
||||
# source_blob_name,
|
||||
# destination_file_name))
|
|
@ -0,0 +1,201 @@
|
|||
# Model validation metrics
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from . import general
|
||||
|
||||
|
||||
def fitness(x):
|
||||
# Model fitness as a weighted combination of metrics
|
||||
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
||||
return (x[:, :4] * w).sum(1)
|
||||
|
||||
|
||||
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
|
||||
# Arguments
|
||||
tp: True positives (nparray, nx1 or nx10).
|
||||
conf: Objectness value from 0-1 (nparray).
|
||||
pred_cls: Predicted object classes (nparray).
|
||||
target_cls: True object classes (nparray).
|
||||
plot: Plot precision-recall curve at mAP@0.5
|
||||
save_dir: Plot save directory
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
|
||||
# Sort by objectness
|
||||
i = np.argsort(-conf)
|
||||
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
|
||||
|
||||
# Find unique classes
|
||||
unique_classes = np.unique(target_cls)
|
||||
|
||||
# Create Precision-Recall curve and compute AP for each class
|
||||
px, py = np.linspace(0, 1, 1000), [] # for plotting
|
||||
pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
|
||||
s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
|
||||
ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
|
||||
for ci, c in enumerate(unique_classes):
|
||||
i = pred_cls == c
|
||||
n_l = (target_cls == c).sum() # number of labels
|
||||
n_p = i.sum() # number of predictions
|
||||
|
||||
if n_p == 0 or n_l == 0:
|
||||
continue
|
||||
else:
|
||||
# Accumulate FPs and TPs
|
||||
fpc = (1 - tp[i]).cumsum(0)
|
||||
tpc = tp[i].cumsum(0)
|
||||
|
||||
# Recall
|
||||
recall = tpc / (n_l + 1e-16) # recall curve
|
||||
r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases
|
||||
|
||||
# Precision
|
||||
precision = tpc / (tpc + fpc) # precision curve
|
||||
p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
|
||||
|
||||
# AP from recall-precision curve
|
||||
for j in range(tp.shape[1]):
|
||||
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
|
||||
if plot and (j == 0):
|
||||
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
|
||||
|
||||
# Compute F1 score (harmonic mean of precision and recall)
|
||||
f1 = 2 * p * r / (p + r + 1e-16)
|
||||
|
||||
if plot:
|
||||
plot_pr_curve(px, py, ap, save_dir, names)
|
||||
|
||||
return p, r, ap, f1, unique_classes.astype('int32')
|
||||
|
||||
|
||||
def compute_ap(recall, precision):
|
||||
""" Compute the average precision, given the recall and precision curves.
|
||||
Source: https://github.com/rbgirshick/py-faster-rcnn.
|
||||
# Arguments
|
||||
recall: The recall curve (list).
|
||||
precision: The precision curve (list).
|
||||
# Returns
|
||||
The average precision as computed in py-faster-rcnn.
|
||||
"""
|
||||
|
||||
# Append sentinel values to beginning and end
|
||||
mrec = recall # np.concatenate(([0.], recall, [recall[-1] + 1E-3]))
|
||||
mpre = precision # np.concatenate(([0.], precision, [0.]))
|
||||
|
||||
# Compute the precision envelope
|
||||
mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
|
||||
|
||||
# Integrate area under curve
|
||||
method = 'interp' # methods: 'continuous', 'interp'
|
||||
if method == 'interp':
|
||||
x = np.linspace(0, 1, 101) # 101-point interp (COCO)
|
||||
ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
|
||||
else: # 'continuous'
|
||||
i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
|
||||
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
|
||||
|
||||
return ap, mpre, mrec
|
||||
|
||||
|
||||
class ConfusionMatrix:
|
||||
# Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
|
||||
def __init__(self, nc, conf=0.25, iou_thres=0.45):
|
||||
self.matrix = np.zeros((nc + 1, nc + 1))
|
||||
self.nc = nc # number of classes
|
||||
self.conf = conf
|
||||
self.iou_thres = iou_thres
|
||||
|
||||
def process_batch(self, detections, labels):
|
||||
"""
|
||||
Return intersection-over-union (Jaccard index) of boxes.
|
||||
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
||||
Arguments:
|
||||
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
|
||||
labels (Array[M, 5]), class, x1, y1, x2, y2
|
||||
Returns:
|
||||
None, updates confusion matrix accordingly
|
||||
"""
|
||||
detections = detections[detections[:, 4] > self.conf]
|
||||
gt_classes = labels[:, 0].int()
|
||||
detection_classes = detections[:, 5].int()
|
||||
iou = general.box_iou(labels[:, 1:], detections[:, :4])
|
||||
|
||||
x = torch.where(iou > self.iou_thres)
|
||||
if x[0].shape[0]:
|
||||
matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
|
||||
if x[0].shape[0] > 1:
|
||||
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
|
||||
matches = matches[matches[:, 2].argsort()[::-1]]
|
||||
matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
|
||||
else:
|
||||
matches = np.zeros((0, 3))
|
||||
|
||||
n = matches.shape[0] > 0
|
||||
m0, m1, _ = matches.transpose().astype(np.int16)
|
||||
for i, gc in enumerate(gt_classes):
|
||||
j = m0 == i
|
||||
if n and sum(j) == 1:
|
||||
self.matrix[gc, detection_classes[m1[j]]] += 1 # correct
|
||||
else:
|
||||
self.matrix[gc, self.nc] += 1 # background FP
|
||||
|
||||
if n:
|
||||
for i, dc in enumerate(detection_classes):
|
||||
if not any(m1 == i):
|
||||
self.matrix[self.nc, dc] += 1 # background FN
|
||||
|
||||
def matrix(self):
|
||||
return self.matrix
|
||||
|
||||
def plot(self, save_dir='', names=()):
|
||||
try:
|
||||
import seaborn as sn
|
||||
|
||||
array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize
|
||||
array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
|
||||
|
||||
fig = plt.figure(figsize=(12, 9), tight_layout=True)
|
||||
sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
|
||||
labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
|
||||
sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
|
||||
xticklabels=names + ['background FN'] if labels else "auto",
|
||||
yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
|
||||
fig.axes[0].set_xlabel('True')
|
||||
fig.axes[0].set_ylabel('Predicted')
|
||||
fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
def print(self):
|
||||
for i in range(self.nc + 1):
|
||||
print(' '.join(map(str, self.matrix[i])))
|
||||
|
||||
|
||||
# Plots ----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
def plot_pr_curve(px, py, ap, save_dir='.', names=()):
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
py = np.stack(py, axis=1)
|
||||
|
||||
if 0 < len(names) < 21: # show mAP in legend if < 10 classes
|
||||
for i, y in enumerate(py.T):
|
||||
ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
|
||||
else:
|
||||
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
|
||||
|
||||
ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
|
||||
ax.set_xlabel('Recall')
|
||||
ax.set_ylabel('Precision')
|
||||
ax.set_xlim(0, 1)
|
||||
ax.set_ylim(0, 1)
|
||||
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
|
||||
fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
|
|
@ -0,0 +1,242 @@
|
|||
# PyTorch utils
|
||||
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from copy import deepcopy
|
||||
|
||||
import torch
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torchvision
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def torch_distributed_zero_first(local_rank: int):
|
||||
"""
|
||||
Decorator to make all processes in distributed training wait for each local_master to do something.
|
||||
"""
|
||||
if local_rank not in [-1, 0]:
|
||||
torch.distributed.barrier()
|
||||
yield
|
||||
if local_rank == 0:
|
||||
torch.distributed.barrier()
|
||||
|
||||
|
||||
def init_torch_seeds(seed=0):
|
||||
# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
|
||||
torch.manual_seed(seed)
|
||||
if seed == 0: # slower, more reproducible
|
||||
cudnn.deterministic = True
|
||||
cudnn.benchmark = False
|
||||
else: # faster, less reproducible
|
||||
cudnn.deterministic = False
|
||||
cudnn.benchmark = True
|
||||
|
||||
|
||||
def select_device(device='', batch_size=None):
|
||||
# device = 'cpu' or '0' or '0,1,2,3'
|
||||
cpu_request = device.lower() == 'cpu'
|
||||
if device and not cpu_request: # if device requested other than 'cpu'
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
|
||||
assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
|
||||
|
||||
cuda = False if cpu_request else torch.cuda.is_available()
|
||||
if cuda:
|
||||
c = 1024 ** 2 # bytes to MB
|
||||
ng = torch.cuda.device_count()
|
||||
if ng > 1 and batch_size: # check that batch_size is compatible with device_count
|
||||
assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
|
||||
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
|
||||
s = f'Using torch {torch.__version__} '
|
||||
for i in range(0, ng):
|
||||
if i == 1:
|
||||
s = ' ' * len(s)
|
||||
logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c))
|
||||
else:
|
||||
logger.info(f'Using torch {torch.__version__} CPU')
|
||||
|
||||
logger.info('') # skip a line
|
||||
return torch.device('cuda:0' if cuda else 'cpu')
|
||||
|
||||
|
||||
def time_synchronized():
|
||||
torch.cuda.synchronize() if torch.cuda.is_available() else None
|
||||
return time.time()
|
||||
|
||||
|
||||
def is_parallel(model):
|
||||
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
||||
|
||||
|
||||
def intersect_dicts(da, db, exclude=()):
|
||||
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
|
||||
return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t is nn.Conv2d:
|
||||
pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
|
||||
elif t is nn.BatchNorm2d:
|
||||
m.eps = 1e-3
|
||||
m.momentum = 0.03
|
||||
elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
|
||||
m.inplace = True
|
||||
|
||||
|
||||
def find_modules(model, mclass=nn.Conv2d):
|
||||
# Finds layer indices matching module class 'mclass'
|
||||
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
||||
|
||||
|
||||
def sparsity(model):
|
||||
# Return global model sparsity
|
||||
a, b = 0., 0.
|
||||
for p in model.parameters():
|
||||
a += p.numel()
|
||||
b += (p == 0).sum()
|
||||
return b / a
|
||||
|
||||
|
||||
def prune(model, amount=0.3):
|
||||
# Prune model to requested global sparsity
|
||||
import torch.nn.utils.prune as prune
|
||||
print('Pruning model... ', end='')
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, nn.Conv2d):
|
||||
prune.l1_unstructured(m, name='weight', amount=amount) # prune
|
||||
prune.remove(m, 'weight') # make permanent
|
||||
print(' %.3g global sparsity' % sparsity(model))
|
||||
|
||||
|
||||
def fuse_conv_and_bn(conv, bn):
|
||||
# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
||||
fusedconv = nn.Conv2d(conv.in_channels,
|
||||
conv.out_channels,
|
||||
kernel_size=conv.kernel_size,
|
||||
stride=conv.stride,
|
||||
padding=conv.padding,
|
||||
groups=conv.groups,
|
||||
bias=True).requires_grad_(False).to(conv.weight.device)
|
||||
|
||||
# prepare filters
|
||||
w_conv = conv.weight.clone().view(conv.out_channels, -1)
|
||||
w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
|
||||
fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
|
||||
|
||||
# prepare spatial bias
|
||||
b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
|
||||
b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
|
||||
fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
|
||||
|
||||
return fusedconv
|
||||
|
||||
|
||||
def model_info(model, verbose=False, img_size=640):
|
||||
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
|
||||
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
||||
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
||||
if verbose:
|
||||
print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
|
||||
for i, (name, p) in enumerate(model.named_parameters()):
|
||||
name = name.replace('module_list.', '')
|
||||
print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
|
||||
(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
|
||||
|
||||
try: # FLOPS
|
||||
from thop import profile
|
||||
stride = int(model.stride.max())
|
||||
img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) # input
|
||||
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride FLOPS
|
||||
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float
|
||||
fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 FLOPS
|
||||
except (ImportError, Exception):
|
||||
fs = ''
|
||||
|
||||
logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
|
||||
|
||||
|
||||
def load_classifier(name='resnet101', n=2):
|
||||
# Loads a pretrained model reshaped to n-class output
|
||||
model = torchvision.models.__dict__[name](pretrained=True)
|
||||
|
||||
# ResNet model properties
|
||||
# input_size = [3, 224, 224]
|
||||
# input_space = 'RGB'
|
||||
# input_range = [0, 1]
|
||||
# mean = [0.485, 0.456, 0.406]
|
||||
# std = [0.229, 0.224, 0.225]
|
||||
|
||||
# Reshape output to n classes
|
||||
filters = model.fc.weight.shape[1]
|
||||
model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
|
||||
model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
|
||||
model.fc.out_features = n
|
||||
return model
|
||||
|
||||
|
||||
def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio
|
||||
# scales img(bs,3,y,x) by ratio
|
||||
if ratio == 1.0:
|
||||
return img
|
||||
else:
|
||||
h, w = img.shape[2:]
|
||||
s = (int(h * ratio), int(w * ratio)) # new size
|
||||
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
|
||||
if not same_shape: # pad/crop img
|
||||
gs = 32 # (pixels) grid size
|
||||
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
|
||||
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean
|
||||
|
||||
|
||||
def copy_attr(a, b, include=(), exclude=()):
|
||||
# Copy attributes from b to a, options to only include [...] and to exclude [...]
|
||||
for k, v in b.__dict__.items():
|
||||
if (len(include) and k not in include) or k.startswith('_') or k in exclude:
|
||||
continue
|
||||
else:
|
||||
setattr(a, k, v)
|
||||
|
||||
|
||||
class ModelEMA:
|
||||
""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
|
||||
Keep a moving average of everything in the model state_dict (parameters and buffers).
|
||||
This is intended to allow functionality like
|
||||
https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
|
||||
A smoothed version of the weights is necessary for some training schemes to perform well.
|
||||
This class is sensitive where it is initialized in the sequence of model init,
|
||||
GPU assignment and distributed training wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, model, decay=0.9999, updates=0):
|
||||
# Create EMA
|
||||
self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA
|
||||
# if next(model.parameters()).device.type != 'cpu':
|
||||
# self.ema.half() # FP16 EMA
|
||||
self.updates = updates # number of EMA updates
|
||||
self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)
|
||||
for p in self.ema.parameters():
|
||||
p.requires_grad_(False)
|
||||
|
||||
def update(self, model):
|
||||
# Update EMA parameters
|
||||
with torch.no_grad():
|
||||
self.updates += 1
|
||||
d = self.decay(self.updates)
|
||||
|
||||
msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict
|
||||
for k, v in self.ema.state_dict().items():
|
||||
if v.dtype.is_floating_point:
|
||||
v *= d
|
||||
v += (1. - d) * msd[k].detach()
|
||||
|
||||
def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
|
||||
# Update EMA attributes
|
||||
copy_attr(self.ema, model, include, exclude)
|
Binary file not shown.
Loading…
Reference in New Issue