Родитель
1c1501738e
Коммит
434526bd11
Двоичный файл не отображается.
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/ape/train.txt
|
||||
valid = LINEMOD/ape/test.txt
|
||||
backup = backup/ape
|
||||
mesh = LINEMOD/ape/ape.ply
|
||||
tr_range = LINEMOD/ape/training_range.txt
|
||||
name = ape
|
||||
diam = 0.103
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/benchvise/train.txt
|
||||
valid = LINEMOD/benchvise/test.txt
|
||||
backup = backup/benchvise
|
||||
mesh = LINEMOD/benchvise/benchvise.ply
|
||||
tr_range = LINEMOD/benchvise/training_range.txt
|
||||
name = benchvise
|
||||
diam = 0.286908
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/cam/train.txt
|
||||
valid = LINEMOD/cam/test.txt
|
||||
backup = backup/cam
|
||||
mesh = LINEMOD/cam/cam.ply
|
||||
tr_range = LINEMOD/cam/training_range.txt
|
||||
name = cam
|
||||
diam = 0.173
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/can/train.txt
|
||||
valid = LINEMOD/can/test.txt
|
||||
backup = backup/can
|
||||
mesh = LINEMOD/can/can.ply
|
||||
tr_range = LINEMOD/can/training_range.txt
|
||||
name = can
|
||||
diam = 0.202
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/cat/train.txt
|
||||
valid = LINEMOD/cat/test.txt
|
||||
backup = backup/cat
|
||||
mesh = LINEMOD/cat/cat.ply
|
||||
tr_range = LINEMOD/cat/training_range.txt
|
||||
name = cat
|
||||
diam = 0.155
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/driller/train.txt
|
||||
valid = LINEMOD/driller/test.txt
|
||||
backup = backup/driller
|
||||
mesh = LINEMOD/driller/driller.ply
|
||||
tr_range = LINEMOD/driller/training_range.txt
|
||||
name = driller
|
||||
diam = 0.262
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/duck/train.txt
|
||||
valid = LINEMOD/duck/test.txt
|
||||
backup = backup/duck
|
||||
mesh = LINEMOD/duck/duck.ply
|
||||
tr_range = LINEMOD/duck/training_range.txt
|
||||
name = duck
|
||||
diam = 0.109
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/eggbox/train.txt
|
||||
valid = LINEMOD/eggbox/test.txt
|
||||
backup = backup/eggbox
|
||||
mesh = LINEMOD/eggbox/eggbox.ply
|
||||
tr_range = LINEMOD/eggbox/training_range.txt
|
||||
name = eggbox
|
||||
diam = 0.176364
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/glue/train.txt
|
||||
valid = LINEMOD/glue/test.txt
|
||||
backup = backup/glue
|
||||
mesh = LINEMOD/glue/glue.ply
|
||||
tr_range = LINEMOD/glue/training_range.txt
|
||||
name = glue
|
||||
diam = 0.176
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/holepuncher/train.txt
|
||||
valid = LINEMOD/holepuncher/test.txt
|
||||
backup = backup/holepuncher
|
||||
mesh = LINEMOD/holepuncher/holepuncher.ply
|
||||
tr_range = LINEMOD/holepuncher/training_range.txt
|
||||
name = holepuncher
|
||||
diam = 0.162
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/iron/train.txt
|
||||
valid = LINEMOD/iron/test.txt
|
||||
backup = backup/iron
|
||||
mesh = LINEMOD/iron/iron.ply
|
||||
tr_range = LINEMOD/iron/training_range.txt
|
||||
name = iron
|
||||
diam = 0.303153
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/lamp/train.txt
|
||||
valid = LINEMOD/lamp/test.txt
|
||||
backup = backup/lamp
|
||||
mesh = LINEMOD/lamp/lamp.ply
|
||||
tr_range = LINEMOD/lamp/training_range.txt
|
||||
name = lamp
|
||||
diam = 0.285155
|
|
@ -0,0 +1,7 @@
|
|||
train = LINEMOD/phone/train.txt
|
||||
valid = LINEMOD/phone/test.txt
|
||||
backup = backup/phone
|
||||
mesh = LINEMOD/phone/phone.ply
|
||||
tr_range = LINEMOD/phone/training_range.txt
|
||||
name = phone
|
||||
diam = 0.213
|
|
@ -0,0 +1,262 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=32
|
||||
subdivisions=8
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
# steps=-1,500,20000,30000
|
||||
# steps=-1,180,360,540
|
||||
steps=-1,50,1000,2000
|
||||
scales=0.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
# filters=125
|
||||
filters=32
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
anchors = 0.1067, 0.9223
|
||||
bias_match=1
|
||||
classes=13
|
||||
coords=18
|
||||
num=1
|
||||
softmax=1
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=0
|
||||
noobject_scale=0
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=1
|
|
@ -0,0 +1,261 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=32
|
||||
subdivisions=8
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
# steps=-1,500,20000,30000
|
||||
steps=-1,50,3000,6000
|
||||
scales=0.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
# filters=125
|
||||
filters=20
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
anchors = 0.1067, 0.9223
|
||||
bias_match=1
|
||||
classes=1
|
||||
coords=18
|
||||
num=1
|
||||
softmax=1
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
noobject_scale=0.1
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=1
|
Двоичный файл не отображается.
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/ape/test_occlusion.txt
|
||||
mesh = ../LINEMOD/ape/ape.ply
|
||||
backup = backup_multi
|
||||
name = ape
|
||||
diam = 0.103
|
|
@ -0,0 +1,23 @@
|
|||
train = cfg/train_occlusion.txt
|
||||
valid1 = ../LINEMOD/ape/test_occlusion.txt
|
||||
valid4 = ../LINEMOD/can/test_occlusion.txt
|
||||
valid5 = ../LINEMOD/cat/test_occlusion.txt
|
||||
valid6 = ../LINEMOD/driller/test_occlusion.txt
|
||||
valid7 = ../LINEMOD/duck/test_occlusion.txt
|
||||
valid9 = ../LINEMOD/glue/test_occlusion.txt
|
||||
valid10 = ../LINEMOD/holepuncher/test_occlusion.txt
|
||||
backup = backup_multi
|
||||
mesh1 = ../LINEMOD/ape/ape.ply
|
||||
mesh4 = ../LINEMOD/can/can.ply
|
||||
mesh5 = ../LINEMOD/cat/cat.ply
|
||||
mesh6 = ../LINEMOD/driller/driller.ply
|
||||
mesh7 = ../LINEMOD/duck/duck.ply
|
||||
mesh9 = ../LINEMOD/glue/glue.ply
|
||||
mesh10 = ../LINEMOD/holepuncher/holepuncher.ply
|
||||
diam1 = 0.103
|
||||
diam4 = 0.202
|
||||
diam5 = 0.155
|
||||
diam6 = 0.262
|
||||
diam7 = 0.109
|
||||
diam9 = 0.176
|
||||
diam10 = 0.162
|
|
@ -0,0 +1,183 @@
|
|||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000024.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000030.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000045.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000053.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000063.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000065.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000071.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000072.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000076.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000078.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000091.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000092.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000095.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000099.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000103.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000106.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000116.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000123.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000130.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000134.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000139.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000146.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000152.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000153.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000155.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000157.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000158.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000161.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000163.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000167.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000172.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000174.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000183.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000200.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000214.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000221.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000226.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000235.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000239.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000243.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000271.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000274.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000277.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000286.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000291.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000294.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000302.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000307.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000314.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000320.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000324.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000347.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000350.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000355.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000364.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000367.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000369.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000376.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000377.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000379.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000383.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000384.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000387.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000394.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000402.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000406.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000410.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000413.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000422.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000425.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000430.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000434.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000441.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000446.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000451.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000456.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000461.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000465.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000471.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000480.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000483.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000493.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000496.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000498.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000507.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000512.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000525.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000527.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000532.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000533.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000534.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000539.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000554.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000556.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000568.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000571.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000573.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000576.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000598.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000603.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000604.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000609.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000627.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000635.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000641.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000649.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000653.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000656.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000659.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000668.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000676.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000692.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000697.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000706.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000715.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000717.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000726.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000735.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000744.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000747.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000752.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000758.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000760.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000772.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000775.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000780.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000785.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000800.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000802.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000828.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000837.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000842.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000845.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000847.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000850.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000859.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000875.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000880.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000883.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000891.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000892.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000915.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000916.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000923.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000931.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000933.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000941.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000945.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000954.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000959.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000964.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000975.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000987.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001002.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001014.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001020.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001024.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001038.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001040.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001048.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001066.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001071.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001081.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001084.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001088.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001102.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001103.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001106.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001112.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001121.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001129.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001133.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001135.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001136.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001157.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001159.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001163.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001171.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001172.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001174.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001191.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001198.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001205.jpg
|
|
@ -0,0 +1,261 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=32
|
||||
subdivisions=8
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
steps=-1,100,20000,30000
|
||||
# steps=-1,180,360,540
|
||||
scales=0.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
# filters=125
|
||||
filters=160
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
|
||||
bias_match=1
|
||||
classes=13
|
||||
coords=18
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
noobject_scale=0.1
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=1
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/ape/test_occlusion.txt
|
||||
mesh = ../LINEMOD/ape/ape.ply
|
||||
backup = backup_multi
|
||||
name = ape
|
||||
diam = 0.103
|
|
@ -0,0 +1,7 @@
|
|||
train = ../LINEMOD/benchvise/train.txt
|
||||
valid = ../LINEMOD/benchvise/test.txt
|
||||
backup = backup_multi
|
||||
mesh = ../LINEMOD/benchvise/benchvise.ply
|
||||
tr_range = ../LINEMOD/benchvise/training_range.txt
|
||||
name = benchvise
|
||||
diam = 0.286908
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/can/test_occlusion.txt
|
||||
mesh = ../LINEMOD/can/can.ply
|
||||
backup = backup_multi
|
||||
name = can
|
||||
diam = 0.202
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/cat/test_occlusion.txt
|
||||
mesh = ../LINEMOD/cat/cat.ply
|
||||
backup = backup_multi
|
||||
name = cat
|
||||
diam = 0.155
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/driller/test_occlusion.txt
|
||||
mesh = ../LINEMOD/driller/driller.ply
|
||||
backup = backup_multi
|
||||
name = driller
|
||||
diam = 0.262
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/duck/test_occlusion.txt
|
||||
mesh = ../LINEMOD/duck/duck.ply
|
||||
backup = backup_multi
|
||||
name = duck
|
||||
diam = 0.109
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/eggbox/test_occlusion.txt
|
||||
mesh = ../LINEMOD/eggbox/eggbox.ply
|
||||
backup = backup_multi
|
||||
name = eggbox
|
||||
diam = 0.176364
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/glue/test_occlusion.txt
|
||||
mesh = ../LINEMOD/glue/glue.ply
|
||||
backup = backup_multi
|
||||
name = glue
|
||||
diam = 0.176
|
|
@ -0,0 +1,5 @@
|
|||
valid = ../LINEMOD/holepuncher/test_occlusion.txt
|
||||
mesh = ../LINEMOD/holepuncher/holepuncher.ply
|
||||
backup = backup_multi
|
||||
name = holepuncher
|
||||
diam = 0.162
|
|
@ -0,0 +1,23 @@
|
|||
train = cfg/train_occlusion.txt
|
||||
valid1 = ../LINEMOD/ape/test_occlusion.txt
|
||||
valid4 = ../LINEMOD/can/test_occlusion.txt
|
||||
valid5 = ../LINEMOD/cat/test_occlusion.txt
|
||||
valid6 = ../LINEMOD/driller/test_occlusion.txt
|
||||
valid7 = ../LINEMOD/duck/test_occlusion.txt
|
||||
valid9 = ../LINEMOD/glue/test_occlusion.txt
|
||||
valid10 = ../LINEMOD/holepuncher/test_occlusion.txt
|
||||
backup = backup_multi
|
||||
mesh1 = ../LINEMOD/ape/ape.ply
|
||||
mesh4 = ../LINEMOD/can/can.ply
|
||||
mesh5 = ../LINEMOD/cat/cat.ply
|
||||
mesh6 = ../LINEMOD/driller/driller.ply
|
||||
mesh7 = ../LINEMOD/duck/duck.ply
|
||||
mesh9 = ../LINEMOD/glue/glue.ply
|
||||
mesh10 = ../LINEMOD/holepuncher/holepuncher.ply
|
||||
diam1 = 0.103
|
||||
diam4 = 0.202
|
||||
diam5 = 0.155
|
||||
diam6 = 0.262
|
||||
diam7 = 0.109
|
||||
diam9 = 0.176
|
||||
diam10 = 0.162
|
|
@ -0,0 +1,183 @@
|
|||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000024.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000030.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000045.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000053.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000063.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000065.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000071.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000072.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000076.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000078.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000091.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000092.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000095.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000099.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000103.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000106.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000116.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000123.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000130.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000134.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000139.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000146.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000152.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000153.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000155.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000157.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000158.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000161.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000163.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000167.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000172.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000174.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000183.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000200.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000214.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000221.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000226.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000235.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000239.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000243.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000271.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000274.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000277.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000286.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000291.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000294.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000302.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000307.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000314.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000320.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000324.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000347.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000350.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000355.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000364.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000367.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000369.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000376.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000377.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000379.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000383.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000384.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000387.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000394.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000402.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000406.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000410.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000413.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000422.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000425.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000430.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000434.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000441.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000446.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000451.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000456.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000461.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000465.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000471.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000480.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000483.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000493.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000496.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000498.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000507.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000512.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000525.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000527.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000532.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000533.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000534.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000539.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000554.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000556.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000568.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000571.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000573.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000576.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000598.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000603.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000604.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000609.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000627.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000635.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000641.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000649.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000653.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000656.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000659.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000668.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000676.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000692.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000697.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000706.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000715.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000717.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000726.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000735.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000744.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000747.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000752.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000758.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000760.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000772.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000775.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000780.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000785.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000800.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000802.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000828.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000837.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000842.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000845.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000847.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000850.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000859.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000875.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000880.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000883.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000891.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000892.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000915.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000916.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000923.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000931.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000933.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000941.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000945.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000954.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000959.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000964.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000975.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000987.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001002.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001014.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001020.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001024.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001038.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001040.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001048.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001066.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001071.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001081.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001084.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001088.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001102.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001103.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001106.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001112.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001121.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001129.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001133.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001135.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001136.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001157.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001159.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001163.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001171.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001172.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001174.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001191.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001198.jpg
|
||||
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001205.jpg
|
|
@ -0,0 +1,261 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=64
|
||||
subdivisions=8
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
steps=-1,500,20000,30000
|
||||
# steps=-1,180,360,540
|
||||
scales=0.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
# filters=125
|
||||
filters=160
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
|
||||
bias_match=1
|
||||
classes=13
|
||||
coords=18
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=0
|
||||
noobject_scale=0
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=1
|
|
@ -0,0 +1,261 @@
|
|||
[net]
|
||||
# Testing
|
||||
batch=32
|
||||
subdivisions=8
|
||||
# Training
|
||||
# batch=64
|
||||
# subdivisions=8
|
||||
height=416
|
||||
width=416
|
||||
channels=3
|
||||
momentum=0.9
|
||||
decay=0.0005
|
||||
angle=0
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
hue=.1
|
||||
|
||||
learning_rate=0.001
|
||||
burn_in=1000
|
||||
max_batches = 80200
|
||||
policy=steps
|
||||
steps=-1,100,20000,30000
|
||||
# steps=-1,180,360,540
|
||||
scales=0.1,10,.1,.1
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=32
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=64
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=128
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=256
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[maxpool]
|
||||
size=2
|
||||
stride=2
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=512
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
filters=1024
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
activation=leaky
|
||||
|
||||
|
||||
#######
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[route]
|
||||
layers=-9
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
filters=64
|
||||
activation=leaky
|
||||
|
||||
[reorg]
|
||||
stride=2
|
||||
|
||||
[route]
|
||||
layers=-1,-4
|
||||
|
||||
[convolutional]
|
||||
batch_normalize=1
|
||||
size=3
|
||||
stride=1
|
||||
pad=1
|
||||
filters=1024
|
||||
activation=leaky
|
||||
|
||||
[convolutional]
|
||||
size=1
|
||||
stride=1
|
||||
pad=1
|
||||
# filters=125
|
||||
filters=160
|
||||
activation=linear
|
||||
|
||||
|
||||
[region]
|
||||
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
|
||||
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
|
||||
bias_match=1
|
||||
classes=13
|
||||
coords=18
|
||||
num=5
|
||||
softmax=1
|
||||
jitter=.3
|
||||
rescore=1
|
||||
|
||||
object_scale=5
|
||||
noobject_scale=0.1
|
||||
class_scale=1
|
||||
coord_scale=1
|
||||
|
||||
absolute=1
|
||||
thresh = .6
|
||||
random=1
|
|
@ -0,0 +1,388 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from region_loss_multi import RegionLoss
|
||||
from cfg import *
|
||||
|
||||
class MaxPoolStride1(nn.Module):
|
||||
def __init__(self):
|
||||
super(MaxPoolStride1, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
x = F.max_pool2d(F.pad(x, (0,1,0,1), mode='replicate'), 2, stride=1)
|
||||
return x
|
||||
|
||||
class Reorg(nn.Module):
|
||||
def __init__(self, stride=2):
|
||||
super(Reorg, self).__init__()
|
||||
self.stride = stride
|
||||
def forward(self, x):
|
||||
stride = self.stride
|
||||
assert(x.data.dim() == 4)
|
||||
B = x.data.size(0)
|
||||
C = x.data.size(1)
|
||||
H = x.data.size(2)
|
||||
W = x.data.size(3)
|
||||
assert(H % stride == 0)
|
||||
assert(W % stride == 0)
|
||||
ws = stride
|
||||
hs = stride
|
||||
x = x.view(B, C, H/hs, hs, W/ws, ws).transpose(3,4).contiguous()
|
||||
x = x.view(B, C, H/hs*W/ws, hs*ws).transpose(2,3).contiguous()
|
||||
x = x.view(B, C, hs*ws, H/hs, W/ws).transpose(1,2).contiguous()
|
||||
x = x.view(B, hs*ws*C, H/hs, W/ws)
|
||||
return x
|
||||
|
||||
class GlobalAvgPool2d(nn.Module):
|
||||
def __init__(self):
|
||||
super(GlobalAvgPool2d, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
N = x.data.size(0)
|
||||
C = x.data.size(1)
|
||||
H = x.data.size(2)
|
||||
W = x.data.size(3)
|
||||
x = F.avg_pool2d(x, (H, W))
|
||||
x = x.view(N, C)
|
||||
return x
|
||||
|
||||
# for route and shortcut
|
||||
class EmptyModule(nn.Module):
|
||||
def __init__(self):
|
||||
super(EmptyModule, self).__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
|
||||
# support route shortcut and reorg
|
||||
class Darknet(nn.Module):
|
||||
def __init__(self, cfgfile):
|
||||
super(Darknet, self).__init__()
|
||||
self.blocks = parse_cfg(cfgfile)
|
||||
self.models = self.create_network(self.blocks) # merge conv, bn,leaky
|
||||
self.loss = self.models[len(self.models)-1]
|
||||
|
||||
self.width = int(self.blocks[0]['width'])
|
||||
self.height = int(self.blocks[0]['height'])
|
||||
|
||||
if self.blocks[(len(self.blocks)-1)]['type'] == 'region':
|
||||
self.anchors = self.loss.anchors
|
||||
self.num_anchors = self.loss.num_anchors
|
||||
self.anchor_step = self.loss.anchor_step
|
||||
self.num_classes = self.loss.num_classes
|
||||
|
||||
self.header = torch.IntTensor([0,0,0,0])
|
||||
self.seen = 0
|
||||
self.iter = 0
|
||||
|
||||
def forward(self, x):
|
||||
ind = -2
|
||||
self.loss = None
|
||||
outputs = dict()
|
||||
for block in self.blocks:
|
||||
ind = ind + 1
|
||||
#if ind > 0:
|
||||
# return x
|
||||
|
||||
if block['type'] == 'net':
|
||||
continue
|
||||
elif block['type'] == 'convolutional' or block['type'] == 'maxpool' or block['type'] == 'reorg' or block['type'] == 'avgpool' or block['type'] == 'softmax' or block['type'] == 'connected':
|
||||
x = self.models[ind](x)
|
||||
outputs[ind] = x
|
||||
elif block['type'] == 'route':
|
||||
layers = block['layers'].split(',')
|
||||
layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
|
||||
if len(layers) == 1:
|
||||
x = outputs[layers[0]]
|
||||
outputs[ind] = x
|
||||
elif len(layers) == 2:
|
||||
x1 = outputs[layers[0]]
|
||||
x2 = outputs[layers[1]]
|
||||
x = torch.cat((x1,x2),1)
|
||||
outputs[ind] = x
|
||||
elif block['type'] == 'shortcut':
|
||||
from_layer = int(block['from'])
|
||||
activation = block['activation']
|
||||
from_layer = from_layer if from_layer > 0 else from_layer + ind
|
||||
x1 = outputs[from_layer]
|
||||
x2 = outputs[ind-1]
|
||||
x = x1 + x2
|
||||
if activation == 'leaky':
|
||||
x = F.leaky_relu(x, 0.1, inplace=True)
|
||||
elif activation == 'relu':
|
||||
x = F.relu(x, inplace=True)
|
||||
outputs[ind] = x
|
||||
elif block['type'] == 'region':
|
||||
continue
|
||||
if self.loss:
|
||||
self.loss = self.loss + self.models[ind](x)
|
||||
else:
|
||||
self.loss = self.models[ind](x)
|
||||
outputs[ind] = None
|
||||
elif block['type'] == 'cost':
|
||||
continue
|
||||
else:
|
||||
print('unknown type %s' % (block['type']))
|
||||
return x
|
||||
|
||||
def print_network(self):
|
||||
print_cfg(self.blocks)
|
||||
|
||||
def create_network(self, blocks):
|
||||
models = nn.ModuleList()
|
||||
|
||||
prev_filters = 3
|
||||
out_filters =[]
|
||||
conv_id = 0
|
||||
for block in blocks:
|
||||
if block['type'] == 'net':
|
||||
prev_filters = int(block['channels'])
|
||||
continue
|
||||
elif block['type'] == 'convolutional':
|
||||
conv_id = conv_id + 1
|
||||
batch_normalize = int(block['batch_normalize'])
|
||||
filters = int(block['filters'])
|
||||
kernel_size = int(block['size'])
|
||||
stride = int(block['stride'])
|
||||
is_pad = int(block['pad'])
|
||||
pad = (kernel_size-1)/2 if is_pad else 0
|
||||
activation = block['activation']
|
||||
model = nn.Sequential()
|
||||
if batch_normalize:
|
||||
model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
|
||||
model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4))
|
||||
#model.add_module('bn{0}'.format(conv_id), BN2d(filters))
|
||||
else:
|
||||
model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
|
||||
if activation == 'leaky':
|
||||
model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
|
||||
elif activation == 'relu':
|
||||
model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
|
||||
prev_filters = filters
|
||||
out_filters.append(prev_filters)
|
||||
models.append(model)
|
||||
elif block['type'] == 'maxpool':
|
||||
pool_size = int(block['size'])
|
||||
stride = int(block['stride'])
|
||||
if stride > 1:
|
||||
model = nn.MaxPool2d(pool_size, stride)
|
||||
else:
|
||||
model = MaxPoolStride1()
|
||||
out_filters.append(prev_filters)
|
||||
models.append(model)
|
||||
elif block['type'] == 'avgpool':
|
||||
model = GlobalAvgPool2d()
|
||||
out_filters.append(prev_filters)
|
||||
models.append(model)
|
||||
elif block['type'] == 'softmax':
|
||||
model = nn.Softmax()
|
||||
out_filters.append(prev_filters)
|
||||
models.append(model)
|
||||
elif block['type'] == 'cost':
|
||||
if block['_type'] == 'sse':
|
||||
model = nn.MSELoss(size_average=True)
|
||||
elif block['_type'] == 'L1':
|
||||
model = nn.L1Loss(size_average=True)
|
||||
elif block['_type'] == 'smooth':
|
||||
model = nn.SmoothL1Loss(size_average=True)
|
||||
out_filters.append(1)
|
||||
models.append(model)
|
||||
elif block['type'] == 'reorg':
|
||||
stride = int(block['stride'])
|
||||
prev_filters = stride * stride * prev_filters
|
||||
out_filters.append(prev_filters)
|
||||
models.append(Reorg(stride))
|
||||
elif block['type'] == 'route':
|
||||
layers = block['layers'].split(',')
|
||||
ind = len(models)
|
||||
layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
|
||||
if len(layers) == 1:
|
||||
prev_filters = out_filters[layers[0]]
|
||||
elif len(layers) == 2:
|
||||
assert(layers[0] == ind - 1)
|
||||
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
|
||||
out_filters.append(prev_filters)
|
||||
models.append(EmptyModule())
|
||||
elif block['type'] == 'shortcut':
|
||||
ind = len(models)
|
||||
prev_filters = out_filters[ind-1]
|
||||
out_filters.append(prev_filters)
|
||||
models.append(EmptyModule())
|
||||
elif block['type'] == 'connected':
|
||||
filters = int(block['output'])
|
||||
if block['activation'] == 'linear':
|
||||
model = nn.Linear(prev_filters, filters)
|
||||
elif block['activation'] == 'leaky':
|
||||
model = nn.Sequential(
|
||||
nn.Linear(prev_filters, filters),
|
||||
nn.LeakyReLU(0.1, inplace=True))
|
||||
elif block['activation'] == 'relu':
|
||||
model = nn.Sequential(
|
||||
nn.Linear(prev_filters, filters),
|
||||
nn.ReLU(inplace=True))
|
||||
prev_filters = filters
|
||||
out_filters.append(prev_filters)
|
||||
models.append(model)
|
||||
elif block['type'] == 'region':
|
||||
loss = RegionLoss()
|
||||
anchors = block['anchors'].split(',')
|
||||
loss.anchors = [float(i) for i in anchors]
|
||||
loss.num_classes = int(block['classes'])
|
||||
loss.num_anchors = int(block['num'])
|
||||
loss.anchor_step = len(loss.anchors)/loss.num_anchors
|
||||
loss.object_scale = float(block['object_scale'])
|
||||
loss.noobject_scale = float(block['noobject_scale'])
|
||||
loss.class_scale = float(block['class_scale'])
|
||||
loss.coord_scale = float(block['coord_scale'])
|
||||
out_filters.append(prev_filters)
|
||||
models.append(loss)
|
||||
else:
|
||||
print('unknown type %s' % (block['type']))
|
||||
|
||||
return models
|
||||
|
||||
def load_weights(self, weightfile):
|
||||
fp = open(weightfile, 'rb')
|
||||
header = np.fromfile(fp, count=4, dtype=np.int32)
|
||||
self.header = torch.from_numpy(header)
|
||||
self.seen = self.header[3]
|
||||
buf = np.fromfile(fp, dtype = np.float32)
|
||||
fp.close()
|
||||
|
||||
start = 0
|
||||
ind = -2
|
||||
for block in self.blocks:
|
||||
if start >= buf.size:
|
||||
break
|
||||
ind = ind + 1
|
||||
if block['type'] == 'net':
|
||||
continue
|
||||
elif block['type'] == 'convolutional':
|
||||
model = self.models[ind]
|
||||
batch_normalize = int(block['batch_normalize'])
|
||||
if batch_normalize:
|
||||
start = load_conv_bn(buf, start, model[0], model[1])
|
||||
else:
|
||||
start = load_conv(buf, start, model[0])
|
||||
elif block['type'] == 'connected':
|
||||
model = self.models[ind]
|
||||
if block['activation'] != 'linear':
|
||||
start = load_fc(buf, start, model[0])
|
||||
else:
|
||||
start = load_fc(buf, start, model)
|
||||
elif block['type'] == 'maxpool':
|
||||
pass
|
||||
elif block['type'] == 'reorg':
|
||||
pass
|
||||
elif block['type'] == 'route':
|
||||
pass
|
||||
elif block['type'] == 'shortcut':
|
||||
pass
|
||||
elif block['type'] == 'region':
|
||||
pass
|
||||
elif block['type'] == 'avgpool':
|
||||
pass
|
||||
elif block['type'] == 'softmax':
|
||||
pass
|
||||
elif block['type'] == 'cost':
|
||||
pass
|
||||
else:
|
||||
print('unknown type %s' % (block['type']))
|
||||
|
||||
def load_weights_until_last(self, weightfile):
|
||||
fp = open(weightfile, 'rb')
|
||||
header = np.fromfile(fp, count=4, dtype=np.int32)
|
||||
self.header = torch.from_numpy(header)
|
||||
self.seen = self.header[3]
|
||||
buf = np.fromfile(fp, dtype = np.float32)
|
||||
fp.close()
|
||||
|
||||
start = 0
|
||||
ind = -2
|
||||
blocklen = len(self.blocks)
|
||||
for i in range(blocklen-2):
|
||||
block = self.blocks[i]
|
||||
if start >= buf.size:
|
||||
break
|
||||
ind = ind + 1
|
||||
if block['type'] == 'net':
|
||||
continue
|
||||
elif block['type'] == 'convolutional':
|
||||
model = self.models[ind]
|
||||
batch_normalize = int(block['batch_normalize'])
|
||||
if batch_normalize:
|
||||
start = load_conv_bn(buf, start, model[0], model[1])
|
||||
else:
|
||||
start = load_conv(buf, start, model[0])
|
||||
elif block['type'] == 'connected':
|
||||
model = self.models[ind]
|
||||
if block['activation'] != 'linear':
|
||||
start = load_fc(buf, start, model[0])
|
||||
else:
|
||||
start = load_fc(buf, start, model)
|
||||
elif block['type'] == 'maxpool':
|
||||
pass
|
||||
elif block['type'] == 'reorg':
|
||||
pass
|
||||
elif block['type'] == 'route':
|
||||
pass
|
||||
elif block['type'] == 'shortcut':
|
||||
pass
|
||||
elif block['type'] == 'region':
|
||||
pass
|
||||
elif block['type'] == 'avgpool':
|
||||
pass
|
||||
elif block['type'] == 'softmax':
|
||||
pass
|
||||
elif block['type'] == 'cost':
|
||||
pass
|
||||
else:
|
||||
print('unknown type %s' % (block['type']))
|
||||
|
||||
|
||||
def save_weights(self, outfile, cutoff=0):
|
||||
if cutoff <= 0:
|
||||
cutoff = len(self.blocks)-1
|
||||
|
||||
fp = open(outfile, 'wb')
|
||||
self.header[3] = self.seen
|
||||
header = self.header
|
||||
header.numpy().tofile(fp)
|
||||
|
||||
ind = -1
|
||||
for blockId in range(1, cutoff+1):
|
||||
ind = ind + 1
|
||||
block = self.blocks[blockId]
|
||||
if block['type'] == 'convolutional':
|
||||
model = self.models[ind]
|
||||
batch_normalize = int(block['batch_normalize'])
|
||||
if batch_normalize:
|
||||
save_conv_bn(fp, model[0], model[1])
|
||||
else:
|
||||
save_conv(fp, model[0])
|
||||
elif block['type'] == 'connected':
|
||||
model = self.models[ind]
|
||||
if block['activation'] != 'linear':
|
||||
save_fc(fc, model)
|
||||
else:
|
||||
save_fc(fc, model[0])
|
||||
elif block['type'] == 'maxpool':
|
||||
pass
|
||||
elif block['type'] == 'reorg':
|
||||
pass
|
||||
elif block['type'] == 'route':
|
||||
pass
|
||||
elif block['type'] == 'shortcut':
|
||||
pass
|
||||
elif block['type'] == 'region':
|
||||
pass
|
||||
elif block['type'] == 'avgpool':
|
||||
pass
|
||||
elif block['type'] == 'softmax':
|
||||
pass
|
||||
elif block['type'] == 'cost':
|
||||
pass
|
||||
else:
|
||||
print('unknown type %s' % (block['type']))
|
||||
fp.close()
|
|
@ -0,0 +1,94 @@
|
|||
#!/usr/bin/python
|
||||
# encoding: utf-8
|
||||
|
||||
import os
|
||||
import random
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from utils import read_truths_args, read_truths, get_all_files
|
||||
from image_multi import *
|
||||
|
||||
class listDataset(Dataset):
|
||||
|
||||
def __init__(self, root, shape=None, shuffle=True, transform=None, objclass=None, target_transform=None, train=False, seen=0, batch_size=64, num_workers=4, bg_file_names=None): # bg='/cvlabdata1/home/btekin/ope/data/office_bg'
|
||||
with open(root, 'r') as file:
|
||||
self.lines = file.readlines()
|
||||
if shuffle:
|
||||
random.shuffle(self.lines)
|
||||
self.nSamples = len(self.lines)
|
||||
self.transform = transform
|
||||
self.target_transform = target_transform
|
||||
self.train = train
|
||||
self.shape = shape
|
||||
self.seen = seen
|
||||
self.batch_size = batch_size
|
||||
self.num_workers = num_workers
|
||||
# self.bg_file_names = get_all_files(bg)
|
||||
self.bg_file_names = bg_file_names
|
||||
self.objclass = objclass
|
||||
|
||||
def __len__(self):
|
||||
return self.nSamples
|
||||
|
||||
def __getitem__(self, index):
|
||||
assert index <= len(self), 'index range error'
|
||||
imgpath = self.lines[index].rstrip()
|
||||
|
||||
if self.train and index % 64== 0:
|
||||
if self.seen < 4000*64:
|
||||
width = 13*32
|
||||
self.shape = (width, width)
|
||||
elif self.seen < 8000*64:
|
||||
width = (random.randint(0,3) + 13)*32
|
||||
self.shape = (width, width)
|
||||
elif self.seen < 12000*64:
|
||||
width = (random.randint(0,5) + 12)*32
|
||||
self.shape = (width, width)
|
||||
elif self.seen < 16000*64:
|
||||
width = (random.randint(0,7) + 11)*32
|
||||
self.shape = (width, width)
|
||||
else: # self.seen < 20000*64:
|
||||
width = (random.randint(0,9) + 10)*32
|
||||
self.shape = (width, width)
|
||||
|
||||
if self.train:
|
||||
# jitter = 0.2
|
||||
jitter = 0.1
|
||||
hue = 0.05
|
||||
saturation = 1.5
|
||||
exposure = 1.5
|
||||
|
||||
# Get background image path
|
||||
random_bg_index = random.randint(0, len(self.bg_file_names) - 1)
|
||||
bgpath = self.bg_file_names[random_bg_index]
|
||||
|
||||
img, label = load_data_detection(imgpath, self.shape, jitter, hue, saturation, exposure, bgpath)
|
||||
label = torch.from_numpy(label)
|
||||
else:
|
||||
img = Image.open(imgpath).convert('RGB')
|
||||
if self.shape:
|
||||
img = img.resize(self.shape)
|
||||
|
||||
labpath = imgpath.replace('benchvise', self.objclass).replace('images', 'labels_occlusion').replace('JPEGImages', 'labels_occlusion').replace('.jpg', '.txt').replace('.png','.txt')
|
||||
label = torch.zeros(50*21)
|
||||
if os.path.getsize(labpath):
|
||||
ow, oh = img.size
|
||||
tmp = torch.from_numpy(read_truths_args(labpath, 8.0/ow))
|
||||
tmp = tmp.view(-1)
|
||||
tsz = tmp.numel()
|
||||
if tsz > 50*21:
|
||||
label = tmp[0:50*21]
|
||||
elif tsz > 0:
|
||||
label[0:tsz] = tmp
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
label = self.target_transform(label)
|
||||
|
||||
self.seen = self.seen + self.num_workers
|
||||
return (img, label)
|
|
@ -0,0 +1,450 @@
|
|||
#!/usr/bin/python
|
||||
# encoding: utf-8
|
||||
import random
|
||||
import os
|
||||
from PIL import Image, ImageChops, ImageMath
|
||||
import numpy as np
|
||||
|
||||
def load_data_detection_backup(imgpath, shape, jitter, hue, saturation, exposure, bgpath):
|
||||
labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
|
||||
maskpath = imgpath.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
|
||||
|
||||
## data augmentation
|
||||
img = Image.open(imgpath).convert('RGB')
|
||||
mask = Image.open(maskpath).convert('RGB')
|
||||
bg = Image.open(bgpath).convert('RGB')
|
||||
|
||||
img = change_background(img, mask, bg)
|
||||
img,flip,dx,dy,sx,sy = data_augmentation(img, shape, jitter, hue, saturation, exposure)
|
||||
label = fill_truth_detection(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy)
|
||||
return img,label
|
||||
|
||||
def get_add_objs(objname):
|
||||
# Decide how many additional objects you will augment and what will be the other types of objects
|
||||
if objname == 'ape':
|
||||
add_objs = ['can', 'cat', 'duck', 'glue', 'holepuncher', 'iron', 'phone'] # eggbox
|
||||
elif objname == 'benchvise':
|
||||
add_objs = ['ape', 'can', 'cat', 'driller', 'duck', 'glue', 'holepuncher']
|
||||
elif objname == 'cam':
|
||||
add_objs = ['ape', 'benchvise', 'can', 'cat', 'driller', 'duck', 'holepuncher']
|
||||
elif objname == 'can':
|
||||
add_objs = ['ape', 'benchvise', 'cat', 'driller', 'duck', 'eggbox', 'holepuncher']
|
||||
elif objname == 'cat':
|
||||
add_objs = ['ape', 'can', 'duck', 'glue', 'holepuncher', 'eggbox', 'phone']
|
||||
elif objname == 'driller':
|
||||
add_objs = ['ape', 'benchvise', 'can', 'cat', 'duck', 'glue', 'holepuncher']
|
||||
elif objname == 'duck':
|
||||
add_objs = ['ape', 'can', 'cat', 'eggbox', 'glue', 'holepuncher', 'phone']
|
||||
elif objname == 'eggbox':
|
||||
add_objs = ['ape', 'benchvise', 'cam', 'can', 'cat', 'duck', 'glue', 'holepuncher']
|
||||
elif objname == 'glue':
|
||||
add_objs = ['ape', 'benchvise', 'cam', 'driller', 'duck', 'eggbox', 'holepuncher' ]
|
||||
elif objname == 'holepuncher':
|
||||
add_objs = ['benchvise', 'cam', 'can', 'cat', 'driller', 'duck', 'eggbox']
|
||||
elif objname == 'iron':
|
||||
add_objs = ['ape', 'benchvise', 'can', 'cat', 'driller', 'duck', 'glue']
|
||||
elif objname == 'lamp':
|
||||
add_objs = ['ape', 'benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'iron']
|
||||
elif objname == 'phone':
|
||||
add_objs = ['ape', 'benchvise', 'cam', 'can', 'driller', 'duck', 'holepuncher']
|
||||
return add_objs
|
||||
|
||||
def mask_background(img, mask):
|
||||
ow, oh = img.size
|
||||
|
||||
imcs = list(img.split())
|
||||
maskcs = list(mask.split())
|
||||
fics = list(Image.new(img.mode, img.size).split())
|
||||
|
||||
for c in range(len(imcs)):
|
||||
negmask = maskcs[c].point(lambda i: 1 - i / 255)
|
||||
posmask = maskcs[c].point(lambda i: i / 255)
|
||||
fics[c] = ImageMath.eval("a * c", a=imcs[c], c=posmask).convert('L')
|
||||
out = Image.merge(img.mode, tuple(fics))
|
||||
return out
|
||||
|
||||
|
||||
def scale_image_channel(im, c, v):
|
||||
cs = list(im.split())
|
||||
cs[c] = cs[c].point(lambda i: i * v)
|
||||
out = Image.merge(im.mode, tuple(cs))
|
||||
return out
|
||||
|
||||
def distort_image(im, hue, sat, val):
|
||||
im = im.convert('HSV')
|
||||
cs = list(im.split())
|
||||
cs[1] = cs[1].point(lambda i: i * sat)
|
||||
cs[2] = cs[2].point(lambda i: i * val)
|
||||
|
||||
def change_hue(x):
|
||||
x += hue*255
|
||||
if x > 255:
|
||||
x -= 255
|
||||
if x < 0:
|
||||
x += 255
|
||||
return x
|
||||
cs[0] = cs[0].point(change_hue)
|
||||
im = Image.merge(im.mode, tuple(cs))
|
||||
|
||||
im = im.convert('RGB')
|
||||
#constrain_image(im)
|
||||
return im
|
||||
|
||||
def rand_scale(s):
|
||||
scale = random.uniform(1, s)
|
||||
if(random.randint(1,10000)%2):
|
||||
return scale
|
||||
return 1./scale
|
||||
|
||||
def random_distort_image(im, hue, saturation, exposure):
|
||||
dhue = random.uniform(-hue, hue)
|
||||
dsat = rand_scale(saturation)
|
||||
dexp = rand_scale(exposure)
|
||||
res = distort_image(im, dhue, dsat, dexp)
|
||||
return res
|
||||
|
||||
def data_augmentation(img, shape, jitter, hue, saturation, exposure):
|
||||
oh = img.height
|
||||
ow = img.width
|
||||
|
||||
dw =int(ow*jitter)
|
||||
dh =int(oh*jitter)
|
||||
|
||||
pleft = random.randint(-dw, dw)
|
||||
pright = random.randint(-dw, dw)
|
||||
ptop = random.randint(-dh, dh)
|
||||
pbot = random.randint(-dh, dh)
|
||||
|
||||
swidth = ow - pleft - pright
|
||||
sheight = oh - ptop - pbot
|
||||
|
||||
sx = float(swidth) / ow
|
||||
sy = float(sheight) / oh
|
||||
|
||||
flip = random.randint(1,10000)%2
|
||||
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
|
||||
|
||||
dx = (float(pleft)/ow)/sx
|
||||
dy = (float(ptop) /oh)/sy
|
||||
|
||||
sized = cropped.resize(shape)
|
||||
|
||||
if flip:
|
||||
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
img = random_distort_image(sized, hue, saturation, exposure)
|
||||
|
||||
return img, flip, dx,dy,sx,sy
|
||||
|
||||
def fill_truth_detection(labpath, w, h, flip, dx, dy, sx, sy):
|
||||
max_boxes = 50
|
||||
label = np.zeros((max_boxes,21))
|
||||
if os.path.getsize(labpath):
|
||||
|
||||
bs = np.loadtxt(labpath)
|
||||
if bs is None:
|
||||
return label
|
||||
bs = np.reshape(bs, (-1, 21))
|
||||
cc = 0
|
||||
for i in range(bs.shape[0]):
|
||||
x0 = bs[i][1]
|
||||
y0 = bs[i][2]
|
||||
x1 = bs[i][3]
|
||||
y1 = bs[i][4]
|
||||
x2 = bs[i][5]
|
||||
y2 = bs[i][6]
|
||||
x3 = bs[i][7]
|
||||
y3 = bs[i][8]
|
||||
x4 = bs[i][9]
|
||||
y4 = bs[i][10]
|
||||
x5 = bs[i][11]
|
||||
y5 = bs[i][12]
|
||||
x6 = bs[i][13]
|
||||
y6 = bs[i][14]
|
||||
x7 = bs[i][15]
|
||||
y7 = bs[i][16]
|
||||
x8 = bs[i][17]
|
||||
y8 = bs[i][18]
|
||||
|
||||
x0 = min(0.999, max(0, x0 * sx - dx))
|
||||
y0 = min(0.999, max(0, y0 * sy - dy))
|
||||
x1 = min(0.999, max(0, x1 * sx - dx))
|
||||
y1 = min(0.999, max(0, y1 * sy - dy))
|
||||
x2 = min(0.999, max(0, x2 * sx - dx))
|
||||
y2 = min(0.999, max(0, y2 * sy - dy))
|
||||
x3 = min(0.999, max(0, x3 * sx - dx))
|
||||
y3 = min(0.999, max(0, y3 * sy - dy))
|
||||
x4 = min(0.999, max(0, x4 * sx - dx))
|
||||
y4 = min(0.999, max(0, y4 * sy - dy))
|
||||
x5 = min(0.999, max(0, x5 * sx - dx))
|
||||
y5 = min(0.999, max(0, y5 * sy - dy))
|
||||
x6 = min(0.999, max(0, x6 * sx - dx))
|
||||
y6 = min(0.999, max(0, y6 * sy - dy))
|
||||
x7 = min(0.999, max(0, x7 * sx - dx))
|
||||
y7 = min(0.999, max(0, y7 * sy - dy))
|
||||
x8 = min(0.999, max(0, x8 * sx - dx))
|
||||
y8 = min(0.999, max(0, y8 * sy - dy))
|
||||
|
||||
bs[i][0] = bs[i][0]
|
||||
bs[i][1] = x0
|
||||
bs[i][2] = y0
|
||||
bs[i][3] = x1
|
||||
bs[i][4] = y1
|
||||
bs[i][5] = x2
|
||||
bs[i][6] = y2
|
||||
bs[i][7] = x3
|
||||
bs[i][8] = y3
|
||||
bs[i][9] = x4
|
||||
bs[i][10] = y4
|
||||
bs[i][11] = x5
|
||||
bs[i][12] = y5
|
||||
bs[i][13] = x6
|
||||
bs[i][14] = y6
|
||||
bs[i][15] = x7
|
||||
bs[i][16] = y7
|
||||
bs[i][17] = x8
|
||||
bs[i][18] = y8
|
||||
|
||||
xs = [x1, x2, x3, x4, x5, x6, x7, x8]
|
||||
ys = [y1, y2, y3, y4, y5, y6, y7, y8]
|
||||
min_x = min(xs);
|
||||
max_x = max(xs);
|
||||
min_y = min(ys);
|
||||
max_y = max(ys);
|
||||
bs[i][19] = max_x - min_x;
|
||||
bs[i][20] = max_y - min_y;
|
||||
|
||||
if flip:
|
||||
bs[i][1] = 0.999 - bs[i][1]
|
||||
bs[i][3] = 0.999 - bs[i][3]
|
||||
bs[i][5] = 0.999 - bs[i][5]
|
||||
bs[i][7] = 0.999 - bs[i][7]
|
||||
bs[i][9] = 0.999 - bs[i][9]
|
||||
bs[i][11] = 0.999 - bs[i][11]
|
||||
bs[i][13] = 0.999 - bs[i][13]
|
||||
bs[i][15] = 0.999 - bs[i][15]
|
||||
bs[i][17] = 0.999 - bs[i][17]
|
||||
|
||||
label[cc] = bs[i]
|
||||
cc += 1
|
||||
if cc >= 50:
|
||||
break
|
||||
|
||||
label = np.reshape(label, (-1))
|
||||
return label
|
||||
|
||||
def change_background(img, mask, bg):
|
||||
ow, oh = img.size
|
||||
bg = bg.resize((ow, oh)).convert('RGB')
|
||||
|
||||
imcs = list(img.split())
|
||||
bgcs = list(bg.split())
|
||||
maskcs = list(mask.split())
|
||||
fics = list(Image.new(img.mode, img.size).split())
|
||||
|
||||
for c in range(len(imcs)):
|
||||
negmask = maskcs[c].point(lambda i: 1 - i / 255)
|
||||
posmask = maskcs[c].point(lambda i: i / 255)
|
||||
fics[c] = ImageMath.eval("a * c + b * d", a=imcs[c], b=bgcs[c], c=posmask, d=negmask).convert('L')
|
||||
out = Image.merge(img.mode, tuple(fics))
|
||||
|
||||
return out
|
||||
|
||||
def shifted_data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure):
|
||||
ow, oh = img.size
|
||||
|
||||
dw =int(ow*jitter)
|
||||
dh =int(oh*jitter)
|
||||
|
||||
pleft = random.randint(-dw, dw)
|
||||
pright = random.randint(-dw, dw)
|
||||
ptop = random.randint(-dh, dh)
|
||||
pbot = random.randint(-dh, dh)
|
||||
|
||||
swidth = ow - pleft - pright
|
||||
sheight = oh - ptop - pbot
|
||||
|
||||
sx = float(swidth) / ow
|
||||
sy = float(sheight) / oh
|
||||
|
||||
flip = random.randint(1,10000)%2
|
||||
|
||||
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
|
||||
mask_cropped = mask.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
|
||||
|
||||
cw, ch = cropped.size
|
||||
shift_x = random.randint(-80, 80)
|
||||
shift_y = random.randint(-80, 80)
|
||||
dx = (float(pleft)/ow)/sx - (float(shift_x)/shape[0]) # FIX HERE
|
||||
dy = (float(ptop) /oh)/sy - (float(shift_y)/shape[1]) # FIX HERE
|
||||
|
||||
# dx = (float(pleft)/ow)/sx - (float(shift_x)/ow)
|
||||
# dy = (float(ptop) /oh)/sy - (float(shift_y)/oh)
|
||||
|
||||
sized = cropped.resize(shape)
|
||||
mask_sized = mask_cropped.resize(shape)
|
||||
|
||||
sized = ImageChops.offset(sized, shift_x, shift_y)
|
||||
mask_sized = ImageChops.offset(mask_sized, shift_x, shift_y)
|
||||
|
||||
if flip:
|
||||
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
mask_sized = mask_sized.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
|
||||
img = sized
|
||||
mask = mask_sized
|
||||
|
||||
return img, mask, flip, dx,dy,sx,sy
|
||||
|
||||
def data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure):
|
||||
ow, oh = img.size
|
||||
|
||||
dw =int(ow*jitter)
|
||||
dh =int(oh*jitter)
|
||||
|
||||
pleft = random.randint(-dw, dw)
|
||||
pright = random.randint(-dw, dw)
|
||||
ptop = random.randint(-dh, dh)
|
||||
pbot = random.randint(-dh, dh)
|
||||
|
||||
swidth = ow - pleft - pright
|
||||
sheight = oh - ptop - pbot
|
||||
|
||||
sx = float(swidth) / ow
|
||||
sy = float(sheight) / oh
|
||||
|
||||
flip = random.randint(1,10000)%2
|
||||
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
|
||||
mask_cropped = mask.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
|
||||
|
||||
dx = (float(pleft)/ow)/sx
|
||||
dy = (float(ptop) /oh)/sy
|
||||
|
||||
sized = cropped.resize(shape)
|
||||
mask_sized = mask_cropped.resize(shape)
|
||||
|
||||
if flip:
|
||||
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
mask_sized = mask_sized.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
img = sized
|
||||
mask = mask_sized
|
||||
|
||||
return img, mask, flip, dx,dy,sx,sy
|
||||
|
||||
def superimpose_masked_imgs(masked_img, mask, total_mask):
|
||||
ow, oh = masked_img.size
|
||||
total_mask = total_mask.resize((ow, oh)).convert('RGB')
|
||||
|
||||
imcs = list(masked_img.split())
|
||||
bgcs = list(total_mask.split())
|
||||
maskcs = list(mask.split())
|
||||
fics = list(Image.new(masked_img.mode, masked_img.size).split())
|
||||
|
||||
for c in range(len(imcs)):
|
||||
negmask = maskcs[c].point(lambda i: 1 - i / 255)
|
||||
posmask = maskcs[c].point(lambda i: i / 255)
|
||||
fics[c] = ImageMath.eval("a * c + b * d", a=imcs[c], b=bgcs[c], c=posmask, d=negmask).convert('L')
|
||||
out = Image.merge(masked_img.mode, tuple(fics))
|
||||
|
||||
return out
|
||||
|
||||
def superimpose_masks(mask, total_mask):
|
||||
# bg: total_mask
|
||||
ow, oh = mask.size
|
||||
total_mask = total_mask.resize((ow, oh)).convert('RGB')
|
||||
|
||||
total_maskcs = list(total_mask.split())
|
||||
maskcs = list(mask.split())
|
||||
fics = list(Image.new(mask.mode, mask.size).split())
|
||||
|
||||
for c in range(len(maskcs)):
|
||||
negmask = maskcs[c].point(lambda i: 1 - i / 255)
|
||||
posmask = maskcs[c].point(lambda i: i)
|
||||
fics[c] = ImageMath.eval("c + b * d", b=total_maskcs[c], c=posmask, d=negmask).convert('L')
|
||||
out = Image.merge(mask.mode, tuple(fics))
|
||||
|
||||
return out
|
||||
|
||||
def augment_objects(imgpath, objname, add_objs, shape, jitter, hue, saturation, exposure):
|
||||
|
||||
pixelThreshold = 200
|
||||
|
||||
random.shuffle(add_objs)
|
||||
labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
|
||||
maskpath = imgpath.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
|
||||
|
||||
# Read the image and the mask
|
||||
img = Image.open(imgpath).convert('RGB')
|
||||
iw, ih = img.size
|
||||
mask = Image.open(maskpath).convert('RGB')
|
||||
img,mask,flip,dx,dy,sx,sy = shifted_data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure)
|
||||
label = fill_truth_detection(labpath, iw, ih, flip, dx, dy, 1./sx, 1./sy)
|
||||
total_label = np.reshape(label, (-1, 21))
|
||||
|
||||
# Mask the background
|
||||
masked_img = mask_background(img, mask)
|
||||
mask = mask.resize(shape)
|
||||
masked_img = masked_img.resize(shape)
|
||||
|
||||
# Initialize the total mask and total masked image
|
||||
total_mask = mask
|
||||
total_masked_img = masked_img
|
||||
count = 1
|
||||
for obj in add_objs:
|
||||
successful = False
|
||||
while not successful:
|
||||
|
||||
objpath = '/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/' + obj + '/train.txt'
|
||||
with open(objpath, 'r') as objfile:
|
||||
objlines = objfile.readlines()
|
||||
rand_index = random.randint(0, len(objlines) - 1)
|
||||
obj_rand_img_path = objlines[rand_index].rstrip()
|
||||
obj_rand_mask_path = obj_rand_img_path.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
|
||||
obj_rand_lab_path = obj_rand_img_path.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
|
||||
|
||||
obj_rand_img = Image.open(obj_rand_img_path).convert('RGB')
|
||||
obj_rand_mask = Image.open(obj_rand_mask_path).convert('RGB')
|
||||
obj_rand_masked_img = mask_background(obj_rand_img, obj_rand_mask)
|
||||
|
||||
obj_rand_masked_img,obj_rand_mask,flip,dx,dy,sx,sy = data_augmentation_with_mask(obj_rand_masked_img, obj_rand_mask, shape, jitter, hue, saturation, exposure)
|
||||
obj_rand_label = fill_truth_detection(obj_rand_lab_path, iw, ih, flip, dx, dy, 1./sx, 1./sy)
|
||||
|
||||
# compute intersection (ratio of the object part intersecting with other object parts over the area of the object)
|
||||
xx = np.array(obj_rand_mask)
|
||||
xx = np.where(xx > pixelThreshold, 1, 0)
|
||||
yy = np.array(total_mask)
|
||||
yy = np.where(yy > pixelThreshold, 1, 0)
|
||||
intersection = (xx * yy)
|
||||
if (np.sum(xx) < 0.01) and (np.sum(xx) > -0.01):
|
||||
successful = False
|
||||
continue
|
||||
intersection_ratio = float(np.sum(intersection)) / float(np.sum(xx))
|
||||
if intersection_ratio < 0.2:
|
||||
successful = True
|
||||
total_mask = superimpose_masks(obj_rand_mask, total_mask) # total_mask + obj_rand_mask
|
||||
total_masked_img = superimpose_masked_imgs(obj_rand_masked_img, obj_rand_mask, total_masked_img) # total_masked_img + obj_rand_masked_img
|
||||
obj_rand_label = np.reshape(obj_rand_label, (-1, 21))
|
||||
total_label[count, :] = obj_rand_label[0, :]
|
||||
count = count + 1
|
||||
else:
|
||||
successful = False
|
||||
|
||||
total_masked_img = superimpose_masked_imgs(masked_img, mask, total_masked_img)
|
||||
|
||||
return total_masked_img, np.reshape(total_label, (-1)), total_mask
|
||||
|
||||
def load_data_detection(imgpath, shape, jitter, hue, saturation, exposure, bgpath):
|
||||
|
||||
# Read the background image
|
||||
bg = Image.open(bgpath).convert('RGB')
|
||||
|
||||
# Understand which object it is and get the neighboring objects
|
||||
dirname = os.path.dirname(os.path.dirname(imgpath)) ## dir of dir of file
|
||||
objname = os.path.basename(dirname)
|
||||
add_objs = get_add_objs(objname)
|
||||
|
||||
# Add additional objects in the scene, apply data augmentation on the objects
|
||||
total_masked_img, label, total_mask = augment_objects(imgpath, objname, add_objs, shape, jitter, hue, saturation, exposure)
|
||||
img = change_background(total_masked_img, total_mask, bg)
|
||||
lb = np.reshape(label, (-1, 21))
|
||||
return img,label
|
||||
|
|
@ -0,0 +1,309 @@
|
|||
import time
|
||||
import torch
|
||||
import math
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
from utils import *
|
||||
|
||||
def build_targets(pred_corners, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, sil_thresh, seen):
|
||||
nB = target.size(0)
|
||||
nA = num_anchors
|
||||
nC = num_classes
|
||||
anchor_step = len(anchors)/num_anchors
|
||||
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
|
||||
coord_mask = torch.zeros(nB, nA, nH, nW)
|
||||
cls_mask = torch.zeros(nB, nA, nH, nW)
|
||||
tx0 = torch.zeros(nB, nA, nH, nW)
|
||||
ty0 = torch.zeros(nB, nA, nH, nW)
|
||||
tx1 = torch.zeros(nB, nA, nH, nW)
|
||||
ty1 = torch.zeros(nB, nA, nH, nW)
|
||||
tx2 = torch.zeros(nB, nA, nH, nW)
|
||||
ty2 = torch.zeros(nB, nA, nH, nW)
|
||||
tx3 = torch.zeros(nB, nA, nH, nW)
|
||||
ty3 = torch.zeros(nB, nA, nH, nW)
|
||||
tx4 = torch.zeros(nB, nA, nH, nW)
|
||||
ty4 = torch.zeros(nB, nA, nH, nW)
|
||||
tx5 = torch.zeros(nB, nA, nH, nW)
|
||||
ty5 = torch.zeros(nB, nA, nH, nW)
|
||||
tx6 = torch.zeros(nB, nA, nH, nW)
|
||||
ty6 = torch.zeros(nB, nA, nH, nW)
|
||||
tx7 = torch.zeros(nB, nA, nH, nW)
|
||||
ty7 = torch.zeros(nB, nA, nH, nW)
|
||||
tx8 = torch.zeros(nB, nA, nH, nW)
|
||||
ty8 = torch.zeros(nB, nA, nH, nW)
|
||||
tconf = torch.zeros(nB, nA, nH, nW)
|
||||
tcls = torch.zeros(nB, nA, nH, nW)
|
||||
|
||||
nAnchors = nA*nH*nW
|
||||
nPixels = nH*nW
|
||||
for b in xrange(nB):
|
||||
cur_pred_corners = pred_corners[b*nAnchors:(b+1)*nAnchors].t()
|
||||
cur_confs = torch.zeros(nAnchors)
|
||||
for t in xrange(50):
|
||||
if target[b][t*21+1] == 0:
|
||||
break
|
||||
gx0 = target[b][t*21+1]*nW
|
||||
gy0 = target[b][t*21+2]*nH
|
||||
gx1 = target[b][t*21+3]*nW
|
||||
gy1 = target[b][t*21+4]*nH
|
||||
gx2 = target[b][t*21+5]*nW
|
||||
gy2 = target[b][t*21+6]*nH
|
||||
gx3 = target[b][t*21+7]*nW
|
||||
gy3 = target[b][t*21+8]*nH
|
||||
gx4 = target[b][t*21+9]*nW
|
||||
gy4 = target[b][t*21+10]*nH
|
||||
gx5 = target[b][t*21+11]*nW
|
||||
gy5 = target[b][t*21+12]*nH
|
||||
gx6 = target[b][t*21+13]*nW
|
||||
gy6 = target[b][t*21+14]*nH
|
||||
gx7 = target[b][t*21+15]*nW
|
||||
gy7 = target[b][t*21+16]*nH
|
||||
gx8 = target[b][t*21+17]*nW
|
||||
gy8 = target[b][t*21+18]*nH
|
||||
|
||||
cur_gt_corners = torch.FloatTensor([gx0/nW,gy0/nH,gx1/nW,gy1/nH,gx2/nW,gy2/nH,gx3/nW,gy3/nH,gx4/nW,gy4/nH,gx5/nW,gy5/nH,gx6/nW,gy6/nH,gx7/nW,gy7/nH,gx8/nW,gy8/nH]).repeat(nAnchors,1).t() # 16 x nAnchors
|
||||
cur_confs = torch.max(cur_confs, corner_confidences9(cur_pred_corners, cur_gt_corners)) # some irrelevant areas are filtered, in the same grid multiple anchor boxes might exceed the threshold
|
||||
conf_mask[b][cur_confs>sil_thresh] = 0
|
||||
if seen < -1:#6400:
|
||||
tx0.fill_(0.5)
|
||||
ty0.fill_(0.5)
|
||||
tx1.fill_(0.5)
|
||||
ty1.fill_(0.5)
|
||||
tx2.fill_(0.5)
|
||||
ty2.fill_(0.5)
|
||||
tx3.fill_(0.5)
|
||||
ty3.fill_(0.5)
|
||||
tx4.fill_(0.5)
|
||||
ty4.fill_(0.5)
|
||||
tx5.fill_(0.5)
|
||||
ty5.fill_(0.5)
|
||||
tx6.fill_(0.5)
|
||||
ty6.fill_(0.5)
|
||||
tx7.fill_(0.5)
|
||||
ty7.fill_(0.5)
|
||||
tx8.fill_(0.5)
|
||||
ty8.fill_(0.5)
|
||||
coord_mask.fill_(1)
|
||||
|
||||
nGT = 0
|
||||
nCorrect = 0
|
||||
for b in xrange(nB):
|
||||
for t in xrange(50):
|
||||
if target[b][t*21+1] == 0:
|
||||
break
|
||||
nGT = nGT + 1
|
||||
best_iou = 0.0
|
||||
best_n = -1
|
||||
min_dist = 10000
|
||||
gx0 = target[b][t*21+1] * nW
|
||||
gy0 = target[b][t*21+2] * nH
|
||||
gi0 = int(gx0)
|
||||
gj0 = int(gy0)
|
||||
gx1 = target[b][t*21+3] * nW
|
||||
gy1 = target[b][t*21+4] * nH
|
||||
gx2 = target[b][t*21+5] * nW
|
||||
gy2 = target[b][t*21+6] * nH
|
||||
gx3 = target[b][t*21+7] * nW
|
||||
gy3 = target[b][t*21+8] * nH
|
||||
gx4 = target[b][t*21+9] * nW
|
||||
gy4 = target[b][t*21+10] * nH
|
||||
gx5 = target[b][t*21+11] * nW
|
||||
gy5 = target[b][t*21+12] * nH
|
||||
gx6 = target[b][t*21+13] * nW
|
||||
gy6 = target[b][t*21+14] * nH
|
||||
gx7 = target[b][t*21+15] * nW
|
||||
gy7 = target[b][t*21+16] * nH
|
||||
gx8 = target[b][t*21+17] * nW
|
||||
gy8 = target[b][t*21+18] * nH
|
||||
|
||||
gw = target[b][t*21+19]*nW
|
||||
gh = target[b][t*21+20]*nH
|
||||
gt_box = [0, 0, gw, gh]
|
||||
for n in xrange(nA):
|
||||
aw = anchors[anchor_step*n]
|
||||
ah = anchors[anchor_step*n+1]
|
||||
anchor_box = [0, 0, aw, ah]
|
||||
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
|
||||
if iou > best_iou:
|
||||
best_iou = iou
|
||||
best_n = n
|
||||
|
||||
gt_box = [gx0/nW,gy0/nH,gx1/nW,gy1/nH,gx2/nW,gy2/nH,gx3/nW,gy3/nH,gx4/nW,gy4/nH,gx5/nW,gy5/nH,gx6/nW,gy6/nH,gx7/nW,gy7/nH,gx8/nW,gy8/nH]
|
||||
pred_box = pred_corners[b*nAnchors+best_n*nPixels+gj0*nW+gi0]
|
||||
conf = corner_confidence9(gt_box, pred_box)
|
||||
coord_mask[b][best_n][gj0][gi0] = 1
|
||||
cls_mask[b][best_n][gj0][gi0] = 1
|
||||
conf_mask[b][best_n][gj0][gi0] = object_scale
|
||||
tx0[b][best_n][gj0][gi0] = target[b][t*21+1] * nW - gi0
|
||||
ty0[b][best_n][gj0][gi0] = target[b][t*21+2] * nH - gj0
|
||||
tx1[b][best_n][gj0][gi0] = target[b][t*21+3] * nW - gi0
|
||||
ty1[b][best_n][gj0][gi0] = target[b][t*21+4] * nH - gj0
|
||||
tx2[b][best_n][gj0][gi0] = target[b][t*21+5] * nW - gi0
|
||||
ty2[b][best_n][gj0][gi0] = target[b][t*21+6] * nH - gj0
|
||||
tx3[b][best_n][gj0][gi0] = target[b][t*21+7] * nW - gi0
|
||||
ty3[b][best_n][gj0][gi0] = target[b][t*21+8] * nH - gj0
|
||||
tx4[b][best_n][gj0][gi0] = target[b][t*21+9] * nW - gi0
|
||||
ty4[b][best_n][gj0][gi0] = target[b][t*21+10] * nH - gj0
|
||||
tx5[b][best_n][gj0][gi0] = target[b][t*21+11] * nW - gi0
|
||||
ty5[b][best_n][gj0][gi0] = target[b][t*21+12] * nH - gj0
|
||||
tx6[b][best_n][gj0][gi0] = target[b][t*21+13] * nW - gi0
|
||||
ty6[b][best_n][gj0][gi0] = target[b][t*21+14] * nH - gj0
|
||||
tx7[b][best_n][gj0][gi0] = target[b][t*21+15] * nW - gi0
|
||||
ty7[b][best_n][gj0][gi0] = target[b][t*21+16] * nH - gj0
|
||||
tx8[b][best_n][gj0][gi0] = target[b][t*21+17] * nW - gi0
|
||||
ty8[b][best_n][gj0][gi0] = target[b][t*21+18] * nH - gj0
|
||||
tconf[b][best_n][gj0][gi0] = conf
|
||||
tcls[b][best_n][gj0][gi0] = target[b][t*21]
|
||||
|
||||
if conf > 0.5:
|
||||
nCorrect = nCorrect + 1
|
||||
|
||||
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls
|
||||
|
||||
class RegionLoss(nn.Module):
|
||||
def __init__(self, num_classes=0, anchors=[], num_anchors=5):
|
||||
super(RegionLoss, self).__init__()
|
||||
self.num_classes = num_classes
|
||||
self.anchors = anchors
|
||||
self.num_anchors = num_anchors
|
||||
self.anchor_step = len(anchors)/num_anchors
|
||||
self.coord_scale = 1
|
||||
self.noobject_scale = 1
|
||||
self.object_scale = 5
|
||||
self.class_scale = 1
|
||||
self.thresh = 0.6
|
||||
self.seen = 0
|
||||
|
||||
def forward(self, output, target):
|
||||
# Parameters
|
||||
t0 = time.time()
|
||||
nB = output.data.size(0)
|
||||
nA = self.num_anchors
|
||||
nC = self.num_classes
|
||||
nH = output.data.size(2)
|
||||
nW = output.data.size(3)
|
||||
|
||||
# Activation
|
||||
output = output.view(nB, nA, (19+nC), nH, nW)
|
||||
x0 = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
|
||||
y0 = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
|
||||
x1 = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
|
||||
y1 = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
|
||||
x2 = output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)
|
||||
y2 = output.index_select(2, Variable(torch.cuda.LongTensor([5]))).view(nB, nA, nH, nW)
|
||||
x3 = output.index_select(2, Variable(torch.cuda.LongTensor([6]))).view(nB, nA, nH, nW)
|
||||
y3 = output.index_select(2, Variable(torch.cuda.LongTensor([7]))).view(nB, nA, nH, nW)
|
||||
x4 = output.index_select(2, Variable(torch.cuda.LongTensor([8]))).view(nB, nA, nH, nW)
|
||||
y4 = output.index_select(2, Variable(torch.cuda.LongTensor([9]))).view(nB, nA, nH, nW)
|
||||
x5 = output.index_select(2, Variable(torch.cuda.LongTensor([10]))).view(nB, nA, nH, nW)
|
||||
y5 = output.index_select(2, Variable(torch.cuda.LongTensor([11]))).view(nB, nA, nH, nW)
|
||||
x6 = output.index_select(2, Variable(torch.cuda.LongTensor([12]))).view(nB, nA, nH, nW)
|
||||
y6 = output.index_select(2, Variable(torch.cuda.LongTensor([13]))).view(nB, nA, nH, nW)
|
||||
x7 = output.index_select(2, Variable(torch.cuda.LongTensor([14]))).view(nB, nA, nH, nW)
|
||||
y7 = output.index_select(2, Variable(torch.cuda.LongTensor([15]))).view(nB, nA, nH, nW)
|
||||
x8 = output.index_select(2, Variable(torch.cuda.LongTensor([16]))).view(nB, nA, nH, nW)
|
||||
y8 = output.index_select(2, Variable(torch.cuda.LongTensor([17]))).view(nB, nA, nH, nW)
|
||||
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([18]))).view(nB, nA, nH, nW))
|
||||
cls = output.index_select(2, Variable(torch.linspace(19,19+nC-1,nC).long().cuda()))
|
||||
cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
|
||||
t1 = time.time()
|
||||
|
||||
# Create pred boxes
|
||||
pred_corners = torch.cuda.FloatTensor(18, nB*nA*nH*nW)
|
||||
grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
|
||||
grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
|
||||
pred_corners[0] = (x0.data + grid_x) / nW
|
||||
pred_corners[1] = (y0.data + grid_y) / nH
|
||||
pred_corners[2] = (x1.data + grid_x) / nW
|
||||
pred_corners[3] = (y1.data + grid_y) / nH
|
||||
pred_corners[4] = (x2.data + grid_x) / nW
|
||||
pred_corners[5] = (y2.data + grid_y) / nH
|
||||
pred_corners[6] = (x3.data + grid_x) / nW
|
||||
pred_corners[7] = (y3.data + grid_y) / nH
|
||||
pred_corners[8] = (x4.data + grid_x) / nW
|
||||
pred_corners[9] = (y4.data + grid_y) / nH
|
||||
pred_corners[10] = (x5.data + grid_x) / nW
|
||||
pred_corners[11] = (y5.data + grid_y) / nH
|
||||
pred_corners[12] = (x6.data + grid_x) / nW
|
||||
pred_corners[13] = (y6.data + grid_y) / nH
|
||||
pred_corners[14] = (x7.data + grid_x) / nW
|
||||
pred_corners[15] = (y7.data + grid_y) / nH
|
||||
pred_corners[16] = (x8.data + grid_x) / nW
|
||||
pred_corners[17] = (y8.data + grid_y) / nH
|
||||
gpu_matrix = pred_corners.transpose(0,1).contiguous().view(-1,18)
|
||||
pred_corners = convert2cpu(gpu_matrix)
|
||||
t2 = time.time()
|
||||
|
||||
# Build targets
|
||||
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls = \
|
||||
build_targets(pred_corners, target.data, self.anchors, nA, nC, nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
|
||||
cls_mask = (cls_mask == 1)
|
||||
nProposals = int((conf > 0.25).sum().data[0])
|
||||
tx0 = Variable(tx0.cuda())
|
||||
ty0 = Variable(ty0.cuda())
|
||||
tx1 = Variable(tx1.cuda())
|
||||
ty1 = Variable(ty1.cuda())
|
||||
tx2 = Variable(tx2.cuda())
|
||||
ty2 = Variable(ty2.cuda())
|
||||
tx3 = Variable(tx3.cuda())
|
||||
ty3 = Variable(ty3.cuda())
|
||||
tx4 = Variable(tx4.cuda())
|
||||
ty4 = Variable(ty4.cuda())
|
||||
tx5 = Variable(tx5.cuda())
|
||||
ty5 = Variable(ty5.cuda())
|
||||
tx6 = Variable(tx6.cuda())
|
||||
ty6 = Variable(ty6.cuda())
|
||||
tx7 = Variable(tx7.cuda())
|
||||
ty7 = Variable(ty7.cuda())
|
||||
tx8 = Variable(tx8.cuda())
|
||||
ty8 = Variable(ty8.cuda())
|
||||
tconf = Variable(tconf.cuda())
|
||||
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
|
||||
coord_mask = Variable(coord_mask.cuda())
|
||||
conf_mask = Variable(conf_mask.cuda().sqrt())
|
||||
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
|
||||
cls = cls[cls_mask].view(-1, nC)
|
||||
t3 = time.time()
|
||||
|
||||
# Create loss
|
||||
loss_x0 = self.coord_scale * nn.MSELoss(size_average=False)(x0*coord_mask, tx0*coord_mask)/2.0
|
||||
loss_y0 = self.coord_scale * nn.MSELoss(size_average=False)(y0*coord_mask, ty0*coord_mask)/2.0
|
||||
loss_x1 = self.coord_scale * nn.MSELoss(size_average=False)(x1*coord_mask, tx1*coord_mask)/2.0
|
||||
loss_y1 = self.coord_scale * nn.MSELoss(size_average=False)(y1*coord_mask, ty1*coord_mask)/2.0
|
||||
loss_x2 = self.coord_scale * nn.MSELoss(size_average=False)(x2*coord_mask, tx2*coord_mask)/2.0
|
||||
loss_y2 = self.coord_scale * nn.MSELoss(size_average=False)(y2*coord_mask, ty2*coord_mask)/2.0
|
||||
loss_x3 = self.coord_scale * nn.MSELoss(size_average=False)(x3*coord_mask, tx3*coord_mask)/2.0
|
||||
loss_y3 = self.coord_scale * nn.MSELoss(size_average=False)(y3*coord_mask, ty3*coord_mask)/2.0
|
||||
loss_x4 = self.coord_scale * nn.MSELoss(size_average=False)(x4*coord_mask, tx4*coord_mask)/2.0
|
||||
loss_y4 = self.coord_scale * nn.MSELoss(size_average=False)(y4*coord_mask, ty4*coord_mask)/2.0
|
||||
loss_x5 = self.coord_scale * nn.MSELoss(size_average=False)(x5*coord_mask, tx5*coord_mask)/2.0
|
||||
loss_y5 = self.coord_scale * nn.MSELoss(size_average=False)(y5*coord_mask, ty5*coord_mask)/2.0
|
||||
loss_x6 = self.coord_scale * nn.MSELoss(size_average=False)(x6*coord_mask, tx6*coord_mask)/2.0
|
||||
loss_y6 = self.coord_scale * nn.MSELoss(size_average=False)(y6*coord_mask, ty6*coord_mask)/2.0
|
||||
loss_x7 = self.coord_scale * nn.MSELoss(size_average=False)(x7*coord_mask, tx7*coord_mask)/2.0
|
||||
loss_y7 = self.coord_scale * nn.MSELoss(size_average=False)(y7*coord_mask, ty7*coord_mask)/2.0
|
||||
loss_x8 = self.coord_scale * nn.MSELoss(size_average=False)(x8*coord_mask, tx8*coord_mask)/2.0
|
||||
loss_y8 = self.coord_scale * nn.MSELoss(size_average=False)(y8*coord_mask, ty8*coord_mask)/2.0
|
||||
loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
|
||||
loss_x = loss_x0 + loss_x1 + loss_x2 + loss_x3 + loss_x4 + loss_x5 + loss_x6 + loss_x7 + loss_x8
|
||||
loss_y = loss_y0 + loss_y1 + loss_y2 + loss_y3 + loss_y4 + loss_y5 + loss_y6 + loss_y7 + loss_y8
|
||||
|
||||
loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
|
||||
loss = loss_x + loss_y + loss_conf + loss_cls
|
||||
print('%d: nGT %d, recall %d, proposals %d, loss: x0: %f x %f, y0: %f y %f, conf %f, cls %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x0.data[0], loss_x.data[0], loss_y0.data[0], loss_y.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0]))
|
||||
#else:
|
||||
# loss = loss_x + loss_y + loss_conf
|
||||
# print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, conf %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_conf.data[0], loss.data[0]))
|
||||
|
||||
t4 = time.time()
|
||||
|
||||
if False:
|
||||
print('-----------------------------------')
|
||||
print(' activation : %f' % (t1 - t0))
|
||||
print(' create pred_corners : %f' % (t2 - t1))
|
||||
print(' build targets : %f' % (t3 - t2))
|
||||
print(' create loss : %f' % (t4 - t3))
|
||||
print(' total : %f' % (t4 - t0))
|
||||
|
||||
return loss
|
|
@ -0,0 +1,424 @@
|
|||
from __future__ import print_function
|
||||
import os
|
||||
os.sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import sys
|
||||
import time
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import torch.backends.cudnn as cudnn
|
||||
import numpy as np
|
||||
import random
|
||||
import math
|
||||
import shutil
|
||||
from torchvision import datasets, transforms
|
||||
from torch.autograd import Variable # Useful info about autograd: http://pytorch.org/docs/master/notes/autograd.html
|
||||
|
||||
from darknet_multi import Darknet
|
||||
from MeshPly import MeshPly
|
||||
from utils import *
|
||||
from cfg import parse_cfg
|
||||
import dataset_multi
|
||||
from region_loss_multi import RegionLoss
|
||||
|
||||
|
||||
# Create new directory
|
||||
def makedirs(path):
|
||||
if not os.path.exists( path ):
|
||||
os.makedirs( path )
|
||||
|
||||
# Adjust learning rate during training, learning schedule can be changed in network config file
|
||||
def adjust_learning_rate(optimizer, batch):
|
||||
lr = learning_rate
|
||||
for i in range(len(steps)):
|
||||
scale = scales[i] if i < len(scales) else 1
|
||||
if batch >= steps[i]:
|
||||
lr = lr * scale
|
||||
if batch == steps[i]:
|
||||
break
|
||||
else:
|
||||
break
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = lr/batch_size
|
||||
return lr
|
||||
|
||||
def train(epoch):
|
||||
|
||||
global processed_batches
|
||||
|
||||
# Initialize timer
|
||||
t0 = time.time()
|
||||
|
||||
# Get the dataloader for training dataset
|
||||
train_loader = torch.utils.data.DataLoader(dataset_multi.listDataset(trainlist, shape=(init_width, init_height),
|
||||
shuffle=True,
|
||||
transform=transforms.Compose([transforms.ToTensor(),]),
|
||||
train=True,
|
||||
seen=model.module.seen,
|
||||
batch_size=batch_size,
|
||||
num_workers=num_workers, bg_file_names=bg_file_names),
|
||||
batch_size=batch_size, shuffle=False, **kwargs)
|
||||
|
||||
# TRAINING
|
||||
lr = adjust_learning_rate(optimizer, processed_batches)
|
||||
logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr))
|
||||
# Start training
|
||||
model.train()
|
||||
t1 = time.time()
|
||||
avg_time = torch.zeros(9)
|
||||
niter = 0
|
||||
# Iterate through batches
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
t2 = time.time()
|
||||
# adjust learning rate
|
||||
adjust_learning_rate(optimizer, processed_batches)
|
||||
processed_batches = processed_batches + 1
|
||||
# Pass the data to GPU
|
||||
if use_cuda:
|
||||
data = data.cuda()
|
||||
t3 = time.time()
|
||||
# Wrap tensors in Variable class for automatic differentiation
|
||||
data, target = Variable(data), Variable(target)
|
||||
t4 = time.time()
|
||||
# Zero the gradients before running the backward pass
|
||||
optimizer.zero_grad()
|
||||
t5 = time.time()
|
||||
# Forward pass
|
||||
output = model(data)
|
||||
t6 = time.time()
|
||||
region_loss.seen = region_loss.seen + data.data.size(0)
|
||||
# Compute loss, grow an array of losses for saving later on
|
||||
loss = region_loss(output, target)
|
||||
training_iters.append(epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter)
|
||||
training_losses.append(convert2cpu(loss.data))
|
||||
niter += 1
|
||||
t7 = time.time()
|
||||
# Backprop: compute gradient of the loss with respect to model parameters
|
||||
loss.backward()
|
||||
t8 = time.time()
|
||||
# Update weights
|
||||
optimizer.step()
|
||||
t9 = time.time()
|
||||
# Print time statistics
|
||||
if False and batch_idx > 1:
|
||||
avg_time[0] = avg_time[0] + (t2-t1)
|
||||
avg_time[1] = avg_time[1] + (t3-t2)
|
||||
avg_time[2] = avg_time[2] + (t4-t3)
|
||||
avg_time[3] = avg_time[3] + (t5-t4)
|
||||
avg_time[4] = avg_time[4] + (t6-t5)
|
||||
avg_time[5] = avg_time[5] + (t7-t6)
|
||||
avg_time[6] = avg_time[6] + (t8-t7)
|
||||
avg_time[7] = avg_time[7] + (t9-t8)
|
||||
avg_time[8] = avg_time[8] + (t9-t1)
|
||||
print('-------------------------------')
|
||||
print(' load data : %f' % (avg_time[0]/(batch_idx)))
|
||||
print(' cpu to cuda : %f' % (avg_time[1]/(batch_idx)))
|
||||
print('cuda to variable : %f' % (avg_time[2]/(batch_idx)))
|
||||
print(' zero_grad : %f' % (avg_time[3]/(batch_idx)))
|
||||
print(' forward feature : %f' % (avg_time[4]/(batch_idx)))
|
||||
print(' forward loss : %f' % (avg_time[5]/(batch_idx)))
|
||||
print(' backward : %f' % (avg_time[6]/(batch_idx)))
|
||||
print(' step : %f' % (avg_time[7]/(batch_idx)))
|
||||
print(' total : %f' % (avg_time[8]/(batch_idx)))
|
||||
t1 = time.time()
|
||||
t1 = time.time()
|
||||
return epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter - 1
|
||||
|
||||
def eval(niter, datacfg, cfgfile):
|
||||
def truths_length(truths):
|
||||
for i in range(50):
|
||||
if truths[i][1] == 0:
|
||||
return i
|
||||
|
||||
# Parse configuration files
|
||||
options = read_data_cfg(datacfg)
|
||||
valid_images = options['valid']
|
||||
meshname = options['mesh']
|
||||
backupdir = options['backup']
|
||||
name = options['name']
|
||||
prefix = 'results'
|
||||
# Read object model information, get 3D bounding box corners
|
||||
mesh = MeshPly(meshname)
|
||||
vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
|
||||
corners3D = get_3D_corners(vertices)
|
||||
# Read intrinsic camera parameters
|
||||
internal_calibration = get_camera_intrinsic()
|
||||
|
||||
# Get validation file names
|
||||
with open(valid_images) as fp:
|
||||
tmp_files = fp.readlines()
|
||||
valid_files = [item.rstrip() for item in tmp_files]
|
||||
|
||||
# Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
|
||||
model.eval()
|
||||
|
||||
# Get the parser for the test dataset
|
||||
valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.module.width, model.module.height),
|
||||
shuffle=False,
|
||||
objclass=name,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
]))
|
||||
valid_batchsize = 1
|
||||
|
||||
# Specify the number of workers for multiple processing, get the dataloader for the test dataset
|
||||
kwargs = {'num_workers': 4, 'pin_memory': True}
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs)
|
||||
|
||||
# Parameters
|
||||
num_classes = model.module.num_classes
|
||||
anchors = model.module.anchors
|
||||
num_anchors = model.module.num_anchors
|
||||
testing_error_pixel = 0.0
|
||||
testing_samples = 0.0
|
||||
errs_2d = []
|
||||
|
||||
logging(" Number of test samples: %d" % len(test_loader.dataset))
|
||||
# Iterate through test examples
|
||||
for batch_idx, (data, target) in enumerate(test_loader):
|
||||
t1 = time.time()
|
||||
|
||||
# Pass the data to GPU
|
||||
if use_cuda:
|
||||
data = data.cuda()
|
||||
target = target.cuda()
|
||||
|
||||
# Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
|
||||
data = Variable(data, volatile=True)
|
||||
t2 = time.time()
|
||||
|
||||
# Formward pass
|
||||
output = model(data).data
|
||||
t3 = time.time()
|
||||
|
||||
# Using confidence threshold, eliminate low-confidence predictions
|
||||
trgt = target[0].view(-1, 21)
|
||||
all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
|
||||
t4 = time.time()
|
||||
|
||||
# Iterate through all batch elements
|
||||
for i in range(output.size(0)):
|
||||
|
||||
# For each image, get all the predictions
|
||||
boxes = all_boxes[i]
|
||||
|
||||
# For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
|
||||
truths = target[i].view(-1, 21)
|
||||
|
||||
# Get how many objects are present in the scene
|
||||
num_gts = truths_length(truths)
|
||||
|
||||
|
||||
# Iterate through each ground-truth object
|
||||
for k in range(num_gts):
|
||||
box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6],
|
||||
truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12],
|
||||
truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]
|
||||
best_conf_est = -1
|
||||
|
||||
# If the prediction has the highest confidence, choose it as our prediction
|
||||
for j in range(len(boxes)):
|
||||
if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):
|
||||
best_conf_est = boxes[j][18]
|
||||
box_pr = boxes[j]
|
||||
bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
|
||||
bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
|
||||
iou = bbox_iou(bb2d_gt, bb2d_pr)
|
||||
match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
|
||||
|
||||
# Denormalize the corner predictions
|
||||
corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
|
||||
corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
|
||||
corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
|
||||
corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
|
||||
corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
|
||||
corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
|
||||
corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION
|
||||
|
||||
# Compute [R|t] by pnp
|
||||
objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')
|
||||
K = np.array(internal_calibration, dtype='float32')
|
||||
R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
|
||||
R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
|
||||
|
||||
# Compute pixel error
|
||||
Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
|
||||
Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
|
||||
proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration)
|
||||
proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
|
||||
proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration))
|
||||
proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration))
|
||||
norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
|
||||
pixel_dist = np.mean(norm)
|
||||
errs_2d.append(pixel_dist)
|
||||
|
||||
# Sum errors
|
||||
testing_error_pixel += pixel_dist
|
||||
testing_samples += 1
|
||||
|
||||
t5 = time.time()
|
||||
|
||||
# Compute 2D reprojection score
|
||||
for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
|
||||
acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
|
||||
logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
|
||||
|
||||
if True:
|
||||
logging('-----------------------------------')
|
||||
logging(' tensor to cuda : %f' % (t2 - t1))
|
||||
logging(' predict : %f' % (t3 - t2))
|
||||
logging('get_region_boxes : %f' % (t4 - t3))
|
||||
logging(' eval : %f' % (t5 - t4))
|
||||
logging(' total : %f' % (t5 - t1))
|
||||
logging('-----------------------------------')
|
||||
|
||||
# Register losses and errors for saving later on
|
||||
testing_iters.append(niter)
|
||||
testing_errors_pixel.append(testing_error_pixel/(float(testing_samples)+eps))
|
||||
testing_accuracies.append(acc)
|
||||
|
||||
def test(niter):
|
||||
|
||||
cfgfile = 'cfg/yolo-pose-multi.cfg'
|
||||
datacfg = 'cfg/ape_occlusion.data'
|
||||
logging("Testing ape...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
datacfg = 'cfg/can_occlusion.data'
|
||||
logging("Testing can...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
datacfg = 'cfg/cat_occlusion.data'
|
||||
logging("Testing cat...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
datacfg = 'cfg/duck_occlusion.data'
|
||||
logging("Testing duck...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
datacfg = 'cfg/driller_occlusion.data'
|
||||
logging("Testing driller...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
datacfg = 'cfg/glue_occlusion.data'
|
||||
logging("Testing glue...")
|
||||
eval(niter, datacfg, cfgfile)
|
||||
# datacfg = 'cfg/holepuncher_occlusion.data'
|
||||
# logging("Testing holepuncher...")
|
||||
# eval(niter, datacfg, cfgfile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Training settings
|
||||
datacfg = sys.argv[1]
|
||||
cfgfile = sys.argv[2]
|
||||
weightfile = sys.argv[3]
|
||||
|
||||
# Parse configuration files
|
||||
data_options = read_data_cfg(datacfg)
|
||||
net_options = parse_cfg(cfgfile)[0]
|
||||
trainlist = data_options['train']
|
||||
nsamples = file_lines(trainlist)
|
||||
gpus = data_options['gpus'] # e.g. 0,1,2,3
|
||||
gpus = '0'
|
||||
num_workers = int(data_options['num_workers'])
|
||||
backupdir = data_options['backup']
|
||||
if not os.path.exists(backupdir):
|
||||
makedirs(backupdir)
|
||||
batch_size = int(net_options['batch'])
|
||||
max_batches = int(net_options['max_batches'])
|
||||
learning_rate = float(net_options['learning_rate'])
|
||||
momentum = float(net_options['momentum'])
|
||||
decay = float(net_options['decay'])
|
||||
steps = [float(step) for step in net_options['steps'].split(',')]
|
||||
scales = [float(scale) for scale in net_options['scales'].split(',')]
|
||||
bg_file_names = get_all_files('../VOCdevkit/VOC2012/JPEGImages')
|
||||
|
||||
# Train parameters
|
||||
max_epochs = 700 # max_batches*batch_size/nsamples+1
|
||||
use_cuda = True
|
||||
seed = int(time.time())
|
||||
eps = 1e-5
|
||||
save_interval = 10 # epoches
|
||||
dot_interval = 70 # batches
|
||||
best_acc = -1
|
||||
|
||||
# Test parameters
|
||||
conf_thresh = 0.05
|
||||
nms_thresh = 0.4
|
||||
match_thresh = 0.5
|
||||
iou_thresh = 0.5
|
||||
im_width = 640
|
||||
im_height = 480
|
||||
|
||||
# Specify which gpus to use
|
||||
torch.manual_seed(seed)
|
||||
if use_cuda:
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
|
||||
torch.cuda.manual_seed(seed)
|
||||
|
||||
# Specifiy the model and the loss
|
||||
model = Darknet(cfgfile)
|
||||
region_loss = model.loss
|
||||
|
||||
# Model settings
|
||||
# model.load_weights(weightfile)
|
||||
model.load_weights_until_last(weightfile)
|
||||
model.print_network()
|
||||
model.seen = 0
|
||||
region_loss.iter = model.iter
|
||||
region_loss.seen = model.seen
|
||||
processed_batches = model.seen/batch_size
|
||||
init_width = model.width
|
||||
init_height = model.height
|
||||
init_epoch = model.seen/nsamples
|
||||
|
||||
# Variable to save
|
||||
training_iters = []
|
||||
training_losses = []
|
||||
testing_iters = []
|
||||
testing_errors_pixel = []
|
||||
testing_accuracies = []
|
||||
|
||||
|
||||
# Specify the number of workers
|
||||
kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
|
||||
|
||||
|
||||
# Pass the model to GPU
|
||||
if use_cuda:
|
||||
# model = model.cuda()
|
||||
model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # Multiple GPU parallelism
|
||||
|
||||
# Get the optimizer
|
||||
params_dict = dict(model.named_parameters())
|
||||
params = []
|
||||
for key, value in params_dict.items():
|
||||
if key.find('.bn') >= 0 or key.find('.bias') >= 0:
|
||||
params += [{'params': [value], 'weight_decay': 0.0}]
|
||||
else:
|
||||
params += [{'params': [value], 'weight_decay': decay*batch_size}]
|
||||
optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)
|
||||
# optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimization
|
||||
|
||||
evaluate = False
|
||||
if evaluate:
|
||||
logging('evaluating ...')
|
||||
test(0, 0)
|
||||
else:
|
||||
for epoch in range(init_epoch, max_epochs):
|
||||
# TRAIN
|
||||
niter = train(epoch)
|
||||
# TEST and SAVE
|
||||
if (epoch % 20 == 0) and (epoch is not 0):
|
||||
test(niter)
|
||||
logging('save training stats to %s/costs.npz' % (backupdir))
|
||||
np.savez(os.path.join(backupdir, "costs.npz"),
|
||||
training_iters=training_iters,
|
||||
training_losses=training_losses,
|
||||
testing_iters=testing_iters,
|
||||
testing_accuracies=testing_accuracies,
|
||||
testing_errors_pixel=testing_errors_pixel)
|
||||
if (np.mean(testing_accuracies[-5:]) > best_acc ):
|
||||
best_acc = np.mean(testing_accuracies[-5:])
|
||||
logging('best model so far!')
|
||||
logging('save weights to %s/model.weights' % (backupdir))
|
||||
model.module.save_weights('%s/model.weights' % (backupdir))
|
||||
shutil.copy2('%s/model.weights' % (backupdir), '%s/model_backup.weights' % (backupdir))
|
|
@ -0,0 +1,343 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"import os\n",
|
||||
"os.sys.path.append('..')\n",
|
||||
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n",
|
||||
"import torch\n",
|
||||
"from torch.autograd import Variable\n",
|
||||
"from torchvision import datasets, transforms\n",
|
||||
"from scipy.misc import imsave\n",
|
||||
"import scipy.io\n",
|
||||
"import warnings\n",
|
||||
"import sys\n",
|
||||
"warnings.filterwarnings(\"ignore\")\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import scipy.misc\n",
|
||||
"\n",
|
||||
"from darknet_multi import Darknet\n",
|
||||
"from utils import *\n",
|
||||
"import dataset_multi\n",
|
||||
"from MeshPly import MeshPly"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2018-05-06 14:09:50 Testing ape...\n",
|
||||
"2018-05-06 14:10:15 Acc using 5 px 2D Projection = 7.01%\n",
|
||||
"2018-05-06 14:10:15 Acc using 10 px 2D Projection = 40.43%\n",
|
||||
"2018-05-06 14:10:15 Acc using 15 px 2D Projection = 59.83%\n",
|
||||
"2018-05-06 14:10:15 Acc using 20 px 2D Projection = 68.55%\n",
|
||||
"2018-05-06 14:10:15 Acc using 25 px 2D Projection = 72.05%\n",
|
||||
"2018-05-06 14:10:15 Acc using 30 px 2D Projection = 73.68%\n",
|
||||
"2018-05-06 14:10:15 Acc using 35 px 2D Projection = 74.53%\n",
|
||||
"2018-05-06 14:10:15 Acc using 40 px 2D Projection = 75.13%\n",
|
||||
"2018-05-06 14:10:15 Acc using 45 px 2D Projection = 75.73%\n",
|
||||
"2018-05-06 14:10:15 Acc using 50 px 2D Projection = 76.50%\n",
|
||||
"2018-05-06 14:10:18 Testing can...\n",
|
||||
"2018-05-06 14:10:47 Acc using 5 px 2D Projection = 11.18%\n",
|
||||
"2018-05-06 14:10:47 Acc using 10 px 2D Projection = 57.83%\n",
|
||||
"2018-05-06 14:10:47 Acc using 15 px 2D Projection = 79.95%\n",
|
||||
"2018-05-06 14:10:47 Acc using 20 px 2D Projection = 85.75%\n",
|
||||
"2018-05-06 14:10:47 Acc using 25 px 2D Projection = 88.73%\n",
|
||||
"2018-05-06 14:10:47 Acc using 30 px 2D Projection = 90.39%\n",
|
||||
"2018-05-06 14:10:47 Acc using 35 px 2D Projection = 91.80%\n",
|
||||
"2018-05-06 14:10:47 Acc using 40 px 2D Projection = 93.21%\n",
|
||||
"2018-05-06 14:10:47 Acc using 45 px 2D Projection = 93.62%\n",
|
||||
"2018-05-06 14:10:47 Acc using 50 px 2D Projection = 93.79%\n",
|
||||
"2018-05-06 14:10:50 Testing cat...\n",
|
||||
"2018-05-06 14:11:16 Acc using 5 px 2D Projection = 3.62%\n",
|
||||
"2018-05-06 14:11:16 Acc using 10 px 2D Projection = 23.25%\n",
|
||||
"2018-05-06 14:11:16 Acc using 15 px 2D Projection = 39.51%\n",
|
||||
"2018-05-06 14:11:16 Acc using 20 px 2D Projection = 49.45%\n",
|
||||
"2018-05-06 14:11:16 Acc using 25 px 2D Projection = 54.76%\n",
|
||||
"2018-05-06 14:11:16 Acc using 30 px 2D Projection = 57.96%\n",
|
||||
"2018-05-06 14:11:16 Acc using 35 px 2D Projection = 59.56%\n",
|
||||
"2018-05-06 14:11:16 Acc using 40 px 2D Projection = 60.99%\n",
|
||||
"2018-05-06 14:11:16 Acc using 45 px 2D Projection = 62.51%\n",
|
||||
"2018-05-06 14:11:16 Acc using 50 px 2D Projection = 63.27%\n",
|
||||
"2018-05-06 14:11:19 Testing duck...\n",
|
||||
"2018-05-06 14:11:42 Acc using 5 px 2D Projection = 5.07%\n",
|
||||
"2018-05-06 14:11:42 Acc using 10 px 2D Projection = 18.20%\n",
|
||||
"2018-05-06 14:11:42 Acc using 15 px 2D Projection = 30.88%\n",
|
||||
"2018-05-06 14:11:42 Acc using 20 px 2D Projection = 55.12%\n",
|
||||
"2018-05-06 14:11:42 Acc using 25 px 2D Projection = 75.15%\n",
|
||||
"2018-05-06 14:11:42 Acc using 30 px 2D Projection = 81.45%\n",
|
||||
"2018-05-06 14:11:42 Acc using 35 px 2D Projection = 83.20%\n",
|
||||
"2018-05-06 14:11:42 Acc using 40 px 2D Projection = 83.64%\n",
|
||||
"2018-05-06 14:11:42 Acc using 45 px 2D Projection = 83.90%\n",
|
||||
"2018-05-06 14:11:42 Acc using 50 px 2D Projection = 84.16%\n",
|
||||
"2018-05-06 14:11:45 Testing driller...\n",
|
||||
"2018-05-06 14:12:10 Acc using 5 px 2D Projection = 1.40%\n",
|
||||
"2018-05-06 14:12:10 Acc using 10 px 2D Projection = 17.38%\n",
|
||||
"2018-05-06 14:12:10 Acc using 15 px 2D Projection = 39.87%\n",
|
||||
"2018-05-06 14:12:10 Acc using 20 px 2D Projection = 62.93%\n",
|
||||
"2018-05-06 14:12:10 Acc using 25 px 2D Projection = 80.64%\n",
|
||||
"2018-05-06 14:12:10 Acc using 30 px 2D Projection = 89.87%\n",
|
||||
"2018-05-06 14:12:10 Acc using 35 px 2D Projection = 94.89%\n",
|
||||
"2018-05-06 14:12:10 Acc using 40 px 2D Projection = 95.88%\n",
|
||||
"2018-05-06 14:12:10 Acc using 45 px 2D Projection = 96.54%\n",
|
||||
"2018-05-06 14:12:10 Acc using 50 px 2D Projection = 96.87%\n",
|
||||
"2018-05-06 14:12:13 Testing glue...\n",
|
||||
"2018-05-06 14:12:31 Acc using 5 px 2D Projection = 6.53%\n",
|
||||
"2018-05-06 14:12:31 Acc using 10 px 2D Projection = 26.91%\n",
|
||||
"2018-05-06 14:12:31 Acc using 15 px 2D Projection = 39.65%\n",
|
||||
"2018-05-06 14:12:31 Acc using 20 px 2D Projection = 46.18%\n",
|
||||
"2018-05-06 14:12:31 Acc using 25 px 2D Projection = 49.50%\n",
|
||||
"2018-05-06 14:12:31 Acc using 30 px 2D Projection = 51.83%\n",
|
||||
"2018-05-06 14:12:31 Acc using 35 px 2D Projection = 53.05%\n",
|
||||
"2018-05-06 14:12:31 Acc using 40 px 2D Projection = 53.16%\n",
|
||||
"2018-05-06 14:12:31 Acc using 45 px 2D Projection = 53.93%\n",
|
||||
"2018-05-06 14:12:31 Acc using 50 px 2D Projection = 54.71%\n",
|
||||
"2018-05-06 14:12:45 Testing holepuncher...\n",
|
||||
"2018-05-06 14:19:31 Acc using 5 px 2D Projection = 8.26%\n",
|
||||
"2018-05-06 14:19:31 Acc using 10 px 2D Projection = 39.50%\n",
|
||||
"2018-05-06 14:19:31 Acc using 15 px 2D Projection = 53.31%\n",
|
||||
"2018-05-06 14:19:31 Acc using 20 px 2D Projection = 62.56%\n",
|
||||
"2018-05-06 14:19:31 Acc using 25 px 2D Projection = 68.02%\n",
|
||||
"2018-05-06 14:19:31 Acc using 30 px 2D Projection = 74.71%\n",
|
||||
"2018-05-06 14:19:31 Acc using 35 px 2D Projection = 80.74%\n",
|
||||
"2018-05-06 14:19:31 Acc using 40 px 2D Projection = 85.62%\n",
|
||||
"2018-05-06 14:19:31 Acc using 45 px 2D Projection = 89.59%\n",
|
||||
"2018-05-06 14:19:31 Acc using 50 px 2D Projection = 91.49%\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def valid(datacfg, cfgfile, weightfile, conf_th):\n",
|
||||
" def truths_length(truths):\n",
|
||||
" for i in range(50):\n",
|
||||
" if truths[i][1] == 0:\n",
|
||||
" return i\n",
|
||||
"\n",
|
||||
" # Parse configuration files\n",
|
||||
" options = read_data_cfg(datacfg)\n",
|
||||
" valid_images = options['valid']\n",
|
||||
" meshname = options['mesh']\n",
|
||||
" backupdir = options['backup']\n",
|
||||
" name = options['name']\n",
|
||||
" prefix = 'results'\n",
|
||||
" # Read object model information, get 3D bounding box corners\n",
|
||||
" mesh = MeshPly(meshname)\n",
|
||||
" vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
|
||||
" corners3D = get_3D_corners(vertices)\n",
|
||||
" # Read intrinsic camera parameters\n",
|
||||
" internal_calibration = get_camera_intrinsic()\n",
|
||||
"\n",
|
||||
" # Get validation file names\n",
|
||||
" with open(valid_images) as fp:\n",
|
||||
" tmp_files = fp.readlines()\n",
|
||||
" valid_files = [item.rstrip() for item in tmp_files]\n",
|
||||
" \n",
|
||||
" # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n",
|
||||
" model = Darknet(cfgfile)\n",
|
||||
" model.load_weights(weightfile)\n",
|
||||
" model.cuda()\n",
|
||||
" model.eval()\n",
|
||||
"\n",
|
||||
" # Get the parser for the test dataset\n",
|
||||
" valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),\n",
|
||||
" shuffle=False,\n",
|
||||
" objclass=name,\n",
|
||||
" transform=transforms.Compose([\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" ]))\n",
|
||||
" valid_batchsize = 1\n",
|
||||
"\n",
|
||||
" # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n",
|
||||
" kwargs = {'num_workers': 4, 'pin_memory': True}\n",
|
||||
" test_loader = torch.utils.data.DataLoader(\n",
|
||||
" valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) \n",
|
||||
"\n",
|
||||
" # Parameters\n",
|
||||
" visualize = False\n",
|
||||
" use_cuda = True\n",
|
||||
" num_classes = 13\n",
|
||||
" anchors = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]\n",
|
||||
" num_anchors = 5\n",
|
||||
" eps = 1e-5\n",
|
||||
" conf_thresh = conf_th\n",
|
||||
" iou_thresh = 0.5\n",
|
||||
"\n",
|
||||
" # Parameters to save\n",
|
||||
" errs_2d = []\n",
|
||||
" edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]\n",
|
||||
" edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n",
|
||||
"\n",
|
||||
" # Iterate through test batches (Batch size for test data is 1)\n",
|
||||
" count = 0\n",
|
||||
" logging('Testing {}...'.format(name))\n",
|
||||
" for batch_idx, (data, target) in enumerate(test_loader):\n",
|
||||
" \n",
|
||||
" # Images\n",
|
||||
" img = data[0, :, :, :]\n",
|
||||
" img = img.numpy().squeeze()\n",
|
||||
" img = np.transpose(img, (1, 2, 0))\n",
|
||||
" \n",
|
||||
" t1 = time.time()\n",
|
||||
" # Pass data to GPU\n",
|
||||
" if use_cuda:\n",
|
||||
" data = data.cuda()\n",
|
||||
" target = target.cuda()\n",
|
||||
" \n",
|
||||
" # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n",
|
||||
" data = Variable(data, volatile=True)\n",
|
||||
" t2 = time.time()\n",
|
||||
" \n",
|
||||
" # Forward pass\n",
|
||||
" output = model(data).data \n",
|
||||
" t3 = time.time()\n",
|
||||
" \n",
|
||||
" # Using confidence threshold, eliminate low-confidence predictions\n",
|
||||
" trgt = target[0].view(-1, 21)\n",
|
||||
" all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) \n",
|
||||
" t4 = time.time()\n",
|
||||
" \n",
|
||||
" # Iterate through all images in the batch\n",
|
||||
" for i in range(output.size(0)):\n",
|
||||
" \n",
|
||||
" # For each image, get all the predictions\n",
|
||||
" boxes = all_boxes[i]\n",
|
||||
" \n",
|
||||
" # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n",
|
||||
" truths = target[i].view(-1, 21)\n",
|
||||
" \n",
|
||||
" # Get how many object are present in the scene\n",
|
||||
" num_gts = truths_length(truths)\n",
|
||||
"\n",
|
||||
" # Iterate through each ground-truth object\n",
|
||||
" for k in range(num_gts):\n",
|
||||
" box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], \n",
|
||||
" truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], \n",
|
||||
" truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]\n",
|
||||
" best_conf_est = -1\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" # If the prediction has the highest confidence, choose it as our prediction\n",
|
||||
" for j in range(len(boxes)):\n",
|
||||
" if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):\n",
|
||||
" best_conf_est = boxes[j][18]\n",
|
||||
" box_pr = boxes[j]\n",
|
||||
" bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))\n",
|
||||
" bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))\n",
|
||||
" iou = bbox_iou(bb2d_gt, bb2d_pr)\n",
|
||||
" match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))\n",
|
||||
" \n",
|
||||
" # Denormalize the corner predictions \n",
|
||||
" corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n",
|
||||
" corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n",
|
||||
" corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640\n",
|
||||
" corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480 \n",
|
||||
" corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640\n",
|
||||
" corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480\n",
|
||||
" corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION\n",
|
||||
" \n",
|
||||
" # Compute [R|t] by pnp\n",
|
||||
" objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')\n",
|
||||
" K = np.array(internal_calibration, dtype='float32')\n",
|
||||
" R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)\n",
|
||||
" R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)\n",
|
||||
" \n",
|
||||
" # Compute pixel error\n",
|
||||
" Rt_gt = np.concatenate((R_gt, t_gt), axis=1)\n",
|
||||
" Rt_pr = np.concatenate((R_pr, t_pr), axis=1)\n",
|
||||
" proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) \n",
|
||||
" proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) \n",
|
||||
" proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration)) \n",
|
||||
" proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration)) \n",
|
||||
" norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n",
|
||||
" pixel_dist = np.mean(norm)\n",
|
||||
" errs_2d.append(pixel_dist)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" if visualize:\n",
|
||||
" # Visualize\n",
|
||||
" plt.xlim((0, 640))\n",
|
||||
" plt.ylim((0, 480))\n",
|
||||
" plt.imshow(scipy.misc.imresize(img, (480, 640)))\n",
|
||||
" # Projections\n",
|
||||
" for edge in edges_corners:\n",
|
||||
" plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n",
|
||||
" plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n",
|
||||
" plt.gca().invert_yaxis()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
" t5 = time.time()\n",
|
||||
"\n",
|
||||
" # Compute 2D projection score\n",
|
||||
" for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:\n",
|
||||
" acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n",
|
||||
" # Print test statistics\n",
|
||||
" logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n",
|
||||
"\n",
|
||||
"conf_th = 0.05\n",
|
||||
"cfgfile = 'cfg/yolo-pose-multi.cfg'\n",
|
||||
"weightfile = 'backup_multi/model_backup2.weights'\n",
|
||||
"datacfg = 'cfg/ape_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/can_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/cat_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/duck_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/driller_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/glue_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"datacfg = 'cfg/holepuncher_occlusion.data'\n",
|
||||
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
|
||||
"\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 2",
|
||||
"language": "python",
|
||||
"name": "python2"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
import os
|
||||
os.sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import torch
|
||||
from torch.autograd import Variable
|
||||
from torchvision import datasets, transforms
|
||||
import matplotlib.pyplot as plt
|
||||
import scipy.misc
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
from darknet_multi import Darknet
|
||||
from utils import *
|
||||
import dataset_multi
|
||||
from MeshPly import MeshPly
|
||||
|
||||
def valid(datacfg, cfgfile, weightfile, conf_th):
|
||||
def truths_length(truths):
|
||||
for i in range(50):
|
||||
if truths[i][1] == 0:
|
||||
return i
|
||||
|
||||
# Parse configuration files
|
||||
options = read_data_cfg(datacfg)
|
||||
valid_images = options['valid']
|
||||
meshname = options['mesh']
|
||||
name = options['name']
|
||||
prefix = 'results'
|
||||
# Read object model information, get 3D bounding box corners
|
||||
mesh = MeshPly(meshname)
|
||||
vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
|
||||
corners3D = get_3D_corners(vertices)
|
||||
diam = float(options['diam'])
|
||||
|
||||
# Read intrinsic camera parameters
|
||||
internal_calibration = get_camera_intrinsic()
|
||||
|
||||
# Get validation file names
|
||||
with open(valid_images) as fp:
|
||||
tmp_files = fp.readlines()
|
||||
valid_files = [item.rstrip() for item in tmp_files]
|
||||
|
||||
# Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
|
||||
model = Darknet(cfgfile)
|
||||
model.load_weights(weightfile)
|
||||
model.cuda()
|
||||
model.eval()
|
||||
|
||||
# Get the parser for the test dataset
|
||||
valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),
|
||||
shuffle=False,
|
||||
objclass=name,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
]))
|
||||
valid_batchsize = 1
|
||||
|
||||
# Specify the number of workers for multiple processing, get the dataloader for the test dataset
|
||||
kwargs = {'num_workers': 4, 'pin_memory': True}
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs)
|
||||
|
||||
# Parameters
|
||||
use_cuda = True
|
||||
num_classes = 13
|
||||
anchors = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]
|
||||
num_anchors = 5
|
||||
eps = 1e-5
|
||||
conf_thresh = conf_th
|
||||
iou_thresh = 0.5
|
||||
|
||||
# Parameters to save
|
||||
errs_2d = []
|
||||
edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]
|
||||
edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]
|
||||
|
||||
# Iterate through test batches (Batch size for test data is 1)
|
||||
logging('Testing {}...'.format(name))
|
||||
for batch_idx, (data, target) in enumerate(test_loader):
|
||||
|
||||
t1 = time.time()
|
||||
# Pass data to GPU
|
||||
if use_cuda:
|
||||
data = data.cuda()
|
||||
target = target.cuda()
|
||||
|
||||
# Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
|
||||
data = Variable(data, volatile=True)
|
||||
t2 = time.time()
|
||||
|
||||
# Forward pass
|
||||
output = model(data).data
|
||||
t3 = time.time()
|
||||
|
||||
# Using confidence threshold, eliminate low-confidence predictions
|
||||
trgt = target[0].view(-1, 21)
|
||||
all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
|
||||
t4 = time.time()
|
||||
|
||||
# Iterate through all images in the batch
|
||||
for i in range(output.size(0)):
|
||||
|
||||
# For each image, get all the predictions
|
||||
boxes = all_boxes[i]
|
||||
|
||||
# For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
|
||||
truths = target[i].view(-1, 21)
|
||||
|
||||
# Get how many object are present in the scene
|
||||
num_gts = truths_length(truths)
|
||||
|
||||
# Iterate through each ground-truth object
|
||||
for k in range(num_gts):
|
||||
box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6],
|
||||
truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12],
|
||||
truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]
|
||||
best_conf_est = -1
|
||||
|
||||
|
||||
# If the prediction has the highest confidence, choose it as our prediction
|
||||
for j in range(len(boxes)):
|
||||
if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):
|
||||
best_conf_est = boxes[j][18]
|
||||
box_pr = boxes[j]
|
||||
bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
|
||||
bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
|
||||
iou = bbox_iou(bb2d_gt, bb2d_pr)
|
||||
match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
|
||||
|
||||
# Denormalize the corner predictions
|
||||
corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
|
||||
corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
|
||||
corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640
|
||||
corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480
|
||||
corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640
|
||||
corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480
|
||||
corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners
|
||||
|
||||
# Compute [R|t] by pnp
|
||||
objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')
|
||||
K = np.array(internal_calibration, dtype='float32')
|
||||
R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
|
||||
R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
|
||||
|
||||
# Compute pixel error
|
||||
Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
|
||||
Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
|
||||
proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration)
|
||||
proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
|
||||
proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration))
|
||||
proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration))
|
||||
norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
|
||||
pixel_dist = np.mean(norm)
|
||||
errs_2d.append(pixel_dist)
|
||||
|
||||
t5 = time.time()
|
||||
|
||||
# Compute 2D projection score
|
||||
for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
|
||||
acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
|
||||
# Print test statistics
|
||||
logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
|
||||
|
||||
if __name__ == '__main__' and __package__ is None:
|
||||
import sys
|
||||
if len(sys.argv) == 3:
|
||||
conf_th = 0.05
|
||||
cfgfile = sys.argv[1]
|
||||
weightfile = sys.argv[2]
|
||||
datacfg = 'cfg/ape_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
datacfg = 'cfg/can_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
datacfg = 'cfg/cat_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
datacfg = 'cfg/duck_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
datacfg = 'cfg/glue_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
datacfg = 'cfg/holepuncher_occlusion.data'
|
||||
valid(datacfg, cfgfile, weightfile, conf_th)
|
||||
else:
|
||||
print('Usage:')
|
||||
print(' python valid.py cfgfile weightfile')
|
Загрузка…
Ссылка в новой задаче