created repo and uploaded files
This commit is contained in:
Sudipta N. Sinha 2018-06-30 11:17:29 -07:00
Родитель 1c1501738e
Коммит 434526bd11
43 изменённых файлов: 4052 добавлений и 0 удалений

Двоичные данные
cfg/.DS_Store поставляемый Normal file

Двоичный файл не отображается.

7
cfg/ape.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/ape/train.txt
valid = LINEMOD/ape/test.txt
backup = backup/ape
mesh = LINEMOD/ape/ape.ply
tr_range = LINEMOD/ape/training_range.txt
name = ape
diam = 0.103

7
cfg/benchvise.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/benchvise/train.txt
valid = LINEMOD/benchvise/test.txt
backup = backup/benchvise
mesh = LINEMOD/benchvise/benchvise.ply
tr_range = LINEMOD/benchvise/training_range.txt
name = benchvise
diam = 0.286908

7
cfg/cam.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/cam/train.txt
valid = LINEMOD/cam/test.txt
backup = backup/cam
mesh = LINEMOD/cam/cam.ply
tr_range = LINEMOD/cam/training_range.txt
name = cam
diam = 0.173

7
cfg/can.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/can/train.txt
valid = LINEMOD/can/test.txt
backup = backup/can
mesh = LINEMOD/can/can.ply
tr_range = LINEMOD/can/training_range.txt
name = can
diam = 0.202

7
cfg/cat.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/cat/train.txt
valid = LINEMOD/cat/test.txt
backup = backup/cat
mesh = LINEMOD/cat/cat.ply
tr_range = LINEMOD/cat/training_range.txt
name = cat
diam = 0.155

7
cfg/driller.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/driller/train.txt
valid = LINEMOD/driller/test.txt
backup = backup/driller
mesh = LINEMOD/driller/driller.ply
tr_range = LINEMOD/driller/training_range.txt
name = driller
diam = 0.262

7
cfg/duck.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/duck/train.txt
valid = LINEMOD/duck/test.txt
backup = backup/duck
mesh = LINEMOD/duck/duck.ply
tr_range = LINEMOD/duck/training_range.txt
name = duck
diam = 0.109

7
cfg/eggbox.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/eggbox/train.txt
valid = LINEMOD/eggbox/test.txt
backup = backup/eggbox
mesh = LINEMOD/eggbox/eggbox.ply
tr_range = LINEMOD/eggbox/training_range.txt
name = eggbox
diam = 0.176364

7
cfg/glue.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/glue/train.txt
valid = LINEMOD/glue/test.txt
backup = backup/glue
mesh = LINEMOD/glue/glue.ply
tr_range = LINEMOD/glue/training_range.txt
name = glue
diam = 0.176

7
cfg/holepuncher.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/holepuncher/train.txt
valid = LINEMOD/holepuncher/test.txt
backup = backup/holepuncher
mesh = LINEMOD/holepuncher/holepuncher.ply
tr_range = LINEMOD/holepuncher/training_range.txt
name = holepuncher
diam = 0.162

7
cfg/iron.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/iron/train.txt
valid = LINEMOD/iron/test.txt
backup = backup/iron
mesh = LINEMOD/iron/iron.ply
tr_range = LINEMOD/iron/training_range.txt
name = iron
diam = 0.303153

7
cfg/lamp.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/lamp/train.txt
valid = LINEMOD/lamp/test.txt
backup = backup/lamp
mesh = LINEMOD/lamp/lamp.ply
tr_range = LINEMOD/lamp/training_range.txt
name = lamp
diam = 0.285155

7
cfg/phone.data Normal file
Просмотреть файл

@ -0,0 +1,7 @@
train = LINEMOD/phone/train.txt
valid = LINEMOD/phone/test.txt
backup = backup/phone
mesh = LINEMOD/phone/phone.ply
tr_range = LINEMOD/phone/training_range.txt
name = phone
diam = 0.213

262
cfg/yolo-pose-pre.cfg Normal file
Просмотреть файл

@ -0,0 +1,262 @@
[net]
# Testing
batch=32
subdivisions=8
# Training
# batch=64
# subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
# steps=-1,500,20000,30000
# steps=-1,180,360,540
steps=-1,50,1000,2000
scales=0.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-4
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# filters=125
filters=32
activation=linear
[region]
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
anchors = 0.1067, 0.9223
bias_match=1
classes=13
coords=18
num=1
softmax=1
jitter=.3
rescore=1
object_scale=0
noobject_scale=0
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1

261
cfg/yolo-pose.cfg Normal file
Просмотреть файл

@ -0,0 +1,261 @@
[net]
# Testing
batch=32
subdivisions=8
# Training
# batch=64
# subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
# steps=-1,500,20000,30000
steps=-1,50,3000,6000
scales=0.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-4
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# filters=125
filters=20
activation=linear
[region]
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
anchors = 0.1067, 0.9223
bias_match=1
classes=1
coords=18
num=1
softmax=1
jitter=.3
rescore=1
object_scale=5
noobject_scale=0.1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1

Просмотреть файл

Просмотреть файл

Двоичные данные
multi_obj_pose_estimation/cfg/.DS_Store поставляемый Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/ape/test_occlusion.txt
mesh = ../LINEMOD/ape/ape.ply
backup = backup_multi
name = ape
diam = 0.103

Просмотреть файл

@ -0,0 +1,23 @@
train = cfg/train_occlusion.txt
valid1 = ../LINEMOD/ape/test_occlusion.txt
valid4 = ../LINEMOD/can/test_occlusion.txt
valid5 = ../LINEMOD/cat/test_occlusion.txt
valid6 = ../LINEMOD/driller/test_occlusion.txt
valid7 = ../LINEMOD/duck/test_occlusion.txt
valid9 = ../LINEMOD/glue/test_occlusion.txt
valid10 = ../LINEMOD/holepuncher/test_occlusion.txt
backup = backup_multi
mesh1 = ../LINEMOD/ape/ape.ply
mesh4 = ../LINEMOD/can/can.ply
mesh5 = ../LINEMOD/cat/cat.ply
mesh6 = ../LINEMOD/driller/driller.ply
mesh7 = ../LINEMOD/duck/duck.ply
mesh9 = ../LINEMOD/glue/glue.ply
mesh10 = ../LINEMOD/holepuncher/holepuncher.ply
diam1 = 0.103
diam4 = 0.202
diam5 = 0.155
diam6 = 0.262
diam7 = 0.109
diam9 = 0.176
diam10 = 0.162

Просмотреть файл

@ -0,0 +1,183 @@
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000024.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000030.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000045.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000053.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000063.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000065.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000071.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000072.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000076.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000078.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000091.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000092.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000095.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000099.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000103.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000106.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000116.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000123.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000130.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000134.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000139.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000146.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000152.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000153.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000155.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000157.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000158.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000161.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000163.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000167.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000172.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000174.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000183.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000200.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000214.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000221.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000226.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000235.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000239.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000243.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000271.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000274.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000277.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000286.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000291.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000294.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000302.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000307.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000314.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000320.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000324.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000347.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000350.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000355.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000364.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000367.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000369.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000376.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000377.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000379.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000383.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000384.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000387.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000394.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000402.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000406.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000410.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000413.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000422.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000425.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000430.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000434.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000441.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000446.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000451.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000456.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000461.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000465.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000471.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000480.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000483.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000493.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000496.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000498.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000507.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000512.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000525.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000527.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000532.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000533.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000534.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000539.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000554.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000556.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000568.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000571.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000573.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000576.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000598.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000603.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000604.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000609.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000627.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000635.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000641.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000649.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000653.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000656.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000659.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000668.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000676.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000692.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000697.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000706.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000715.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000717.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000726.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000735.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000744.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000747.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000752.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000758.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000760.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000772.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000775.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000780.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000785.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000800.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000802.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000828.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000837.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000842.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000845.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000847.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000850.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000859.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000875.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000880.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000883.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000891.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000892.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000915.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000916.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000923.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000931.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000933.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000941.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000945.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000954.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000959.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000964.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000975.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000987.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001002.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001014.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001020.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001024.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001038.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001040.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001048.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001066.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001071.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001081.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001084.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001088.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001102.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001103.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001106.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001112.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001121.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001129.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001133.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001135.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001136.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001157.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001159.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001163.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001171.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001172.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001174.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001191.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001198.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001205.jpg

Просмотреть файл

@ -0,0 +1,261 @@
[net]
# Testing
batch=32
subdivisions=8
# Training
# batch=64
# subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
steps=-1,100,20000,30000
# steps=-1,180,360,540
scales=0.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-4
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# filters=125
filters=160
activation=linear
[region]
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
bias_match=1
classes=13
coords=18
num=5
softmax=1
jitter=.3
rescore=1
object_scale=5
noobject_scale=0.1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/ape/test_occlusion.txt
mesh = ../LINEMOD/ape/ape.ply
backup = backup_multi
name = ape
diam = 0.103

Просмотреть файл

@ -0,0 +1,7 @@
train = ../LINEMOD/benchvise/train.txt
valid = ../LINEMOD/benchvise/test.txt
backup = backup_multi
mesh = ../LINEMOD/benchvise/benchvise.ply
tr_range = ../LINEMOD/benchvise/training_range.txt
name = benchvise
diam = 0.286908

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/can/test_occlusion.txt
mesh = ../LINEMOD/can/can.ply
backup = backup_multi
name = can
diam = 0.202

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/cat/test_occlusion.txt
mesh = ../LINEMOD/cat/cat.ply
backup = backup_multi
name = cat
diam = 0.155

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/driller/test_occlusion.txt
mesh = ../LINEMOD/driller/driller.ply
backup = backup_multi
name = driller
diam = 0.262

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/duck/test_occlusion.txt
mesh = ../LINEMOD/duck/duck.ply
backup = backup_multi
name = duck
diam = 0.109

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/eggbox/test_occlusion.txt
mesh = ../LINEMOD/eggbox/eggbox.ply
backup = backup_multi
name = eggbox
diam = 0.176364

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/glue/test_occlusion.txt
mesh = ../LINEMOD/glue/glue.ply
backup = backup_multi
name = glue
diam = 0.176

Просмотреть файл

@ -0,0 +1,5 @@
valid = ../LINEMOD/holepuncher/test_occlusion.txt
mesh = ../LINEMOD/holepuncher/holepuncher.ply
backup = backup_multi
name = holepuncher
diam = 0.162

Просмотреть файл

@ -0,0 +1,23 @@
train = cfg/train_occlusion.txt
valid1 = ../LINEMOD/ape/test_occlusion.txt
valid4 = ../LINEMOD/can/test_occlusion.txt
valid5 = ../LINEMOD/cat/test_occlusion.txt
valid6 = ../LINEMOD/driller/test_occlusion.txt
valid7 = ../LINEMOD/duck/test_occlusion.txt
valid9 = ../LINEMOD/glue/test_occlusion.txt
valid10 = ../LINEMOD/holepuncher/test_occlusion.txt
backup = backup_multi
mesh1 = ../LINEMOD/ape/ape.ply
mesh4 = ../LINEMOD/can/can.ply
mesh5 = ../LINEMOD/cat/cat.ply
mesh6 = ../LINEMOD/driller/driller.ply
mesh7 = ../LINEMOD/duck/duck.ply
mesh9 = ../LINEMOD/glue/glue.ply
mesh10 = ../LINEMOD/holepuncher/holepuncher.ply
diam1 = 0.103
diam4 = 0.202
diam5 = 0.155
diam6 = 0.262
diam7 = 0.109
diam9 = 0.176
diam10 = 0.162

Просмотреть файл

@ -0,0 +1,183 @@
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000024.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000030.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000045.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000053.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000063.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000065.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000071.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000072.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000076.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000078.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000091.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000092.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000095.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000099.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000103.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000106.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000116.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000123.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000130.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000134.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000139.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000146.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000152.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000153.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000155.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000157.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000158.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000161.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000163.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000167.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000172.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000174.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000183.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000200.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000214.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000221.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000226.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000235.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000239.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000243.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000271.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000274.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000277.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000286.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000291.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000294.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000302.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000307.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000314.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000320.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000324.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000347.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000350.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000355.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000364.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000367.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000369.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000376.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000377.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000379.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000383.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000384.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000387.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000394.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000402.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000406.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000410.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000413.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000422.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000425.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000430.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000434.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000441.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000446.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000451.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000456.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000461.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000465.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000471.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000480.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000483.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000493.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000496.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000498.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000507.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000512.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000525.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000527.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000532.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000533.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000534.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000539.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000554.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000556.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000568.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000571.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000573.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000576.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000598.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000603.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000604.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000609.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000627.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000635.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000641.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000649.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000653.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000656.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000659.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000668.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000676.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000692.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000697.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000706.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000715.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000717.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000726.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000735.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000744.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000747.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000752.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000758.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000760.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000772.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000775.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000780.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000785.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000800.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000802.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000828.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000837.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000842.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000845.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000847.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000850.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000859.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000875.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000880.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000883.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000891.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000892.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000915.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000916.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000923.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000931.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000933.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000941.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000945.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000954.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000959.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000964.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000975.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/000987.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001002.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001014.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001020.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001024.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001038.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001040.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001048.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001066.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001071.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001081.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001084.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001088.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001102.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001103.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001106.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001112.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001121.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001129.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001133.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001135.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001136.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001157.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001159.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001163.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001171.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001172.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001174.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001191.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001198.jpg
/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/benchvise/JPEGImages/001205.jpg

Просмотреть файл

@ -0,0 +1,261 @@
[net]
# Testing
batch=64
subdivisions=8
# Training
# batch=64
# subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
steps=-1,500,20000,30000
# steps=-1,180,360,540
scales=0.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-4
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# filters=125
filters=160
activation=linear
[region]
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
bias_match=1
classes=13
coords=18
num=5
softmax=1
jitter=.3
rescore=1
object_scale=0
noobject_scale=0
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1

Просмотреть файл

@ -0,0 +1,261 @@
[net]
# Testing
batch=32
subdivisions=8
# Training
# batch=64
# subdivisions=8
height=416
width=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 80200
policy=steps
steps=-1,100,20000,30000
# steps=-1,180,360,540
scales=0.1,10,.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
#######
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[route]
layers=-9
[convolutional]
batch_normalize=1
size=1
stride=1
pad=1
filters=64
activation=leaky
[reorg]
stride=2
[route]
layers=-1,-4
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# filters=125
filters=160
activation=linear
[region]
# anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
anchors = 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851
bias_match=1
classes=13
coords=18
num=5
softmax=1
jitter=.3
rescore=1
object_scale=5
noobject_scale=0.1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1

Просмотреть файл

@ -0,0 +1,388 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from region_loss_multi import RegionLoss
from cfg import *
class MaxPoolStride1(nn.Module):
def __init__(self):
super(MaxPoolStride1, self).__init__()
def forward(self, x):
x = F.max_pool2d(F.pad(x, (0,1,0,1), mode='replicate'), 2, stride=1)
return x
class Reorg(nn.Module):
def __init__(self, stride=2):
super(Reorg, self).__init__()
self.stride = stride
def forward(self, x):
stride = self.stride
assert(x.data.dim() == 4)
B = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
assert(H % stride == 0)
assert(W % stride == 0)
ws = stride
hs = stride
x = x.view(B, C, H/hs, hs, W/ws, ws).transpose(3,4).contiguous()
x = x.view(B, C, H/hs*W/ws, hs*ws).transpose(2,3).contiguous()
x = x.view(B, C, hs*ws, H/hs, W/ws).transpose(1,2).contiguous()
x = x.view(B, hs*ws*C, H/hs, W/ws)
return x
class GlobalAvgPool2d(nn.Module):
def __init__(self):
super(GlobalAvgPool2d, self).__init__()
def forward(self, x):
N = x.data.size(0)
C = x.data.size(1)
H = x.data.size(2)
W = x.data.size(3)
x = F.avg_pool2d(x, (H, W))
x = x.view(N, C)
return x
# for route and shortcut
class EmptyModule(nn.Module):
def __init__(self):
super(EmptyModule, self).__init__()
def forward(self, x):
return x
# support route shortcut and reorg
class Darknet(nn.Module):
def __init__(self, cfgfile):
super(Darknet, self).__init__()
self.blocks = parse_cfg(cfgfile)
self.models = self.create_network(self.blocks) # merge conv, bn,leaky
self.loss = self.models[len(self.models)-1]
self.width = int(self.blocks[0]['width'])
self.height = int(self.blocks[0]['height'])
if self.blocks[(len(self.blocks)-1)]['type'] == 'region':
self.anchors = self.loss.anchors
self.num_anchors = self.loss.num_anchors
self.anchor_step = self.loss.anchor_step
self.num_classes = self.loss.num_classes
self.header = torch.IntTensor([0,0,0,0])
self.seen = 0
self.iter = 0
def forward(self, x):
ind = -2
self.loss = None
outputs = dict()
for block in self.blocks:
ind = ind + 1
#if ind > 0:
# return x
if block['type'] == 'net':
continue
elif block['type'] == 'convolutional' or block['type'] == 'maxpool' or block['type'] == 'reorg' or block['type'] == 'avgpool' or block['type'] == 'softmax' or block['type'] == 'connected':
x = self.models[ind](x)
outputs[ind] = x
elif block['type'] == 'route':
layers = block['layers'].split(',')
layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
if len(layers) == 1:
x = outputs[layers[0]]
outputs[ind] = x
elif len(layers) == 2:
x1 = outputs[layers[0]]
x2 = outputs[layers[1]]
x = torch.cat((x1,x2),1)
outputs[ind] = x
elif block['type'] == 'shortcut':
from_layer = int(block['from'])
activation = block['activation']
from_layer = from_layer if from_layer > 0 else from_layer + ind
x1 = outputs[from_layer]
x2 = outputs[ind-1]
x = x1 + x2
if activation == 'leaky':
x = F.leaky_relu(x, 0.1, inplace=True)
elif activation == 'relu':
x = F.relu(x, inplace=True)
outputs[ind] = x
elif block['type'] == 'region':
continue
if self.loss:
self.loss = self.loss + self.models[ind](x)
else:
self.loss = self.models[ind](x)
outputs[ind] = None
elif block['type'] == 'cost':
continue
else:
print('unknown type %s' % (block['type']))
return x
def print_network(self):
print_cfg(self.blocks)
def create_network(self, blocks):
models = nn.ModuleList()
prev_filters = 3
out_filters =[]
conv_id = 0
for block in blocks:
if block['type'] == 'net':
prev_filters = int(block['channels'])
continue
elif block['type'] == 'convolutional':
conv_id = conv_id + 1
batch_normalize = int(block['batch_normalize'])
filters = int(block['filters'])
kernel_size = int(block['size'])
stride = int(block['stride'])
is_pad = int(block['pad'])
pad = (kernel_size-1)/2 if is_pad else 0
activation = block['activation']
model = nn.Sequential()
if batch_normalize:
model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4))
#model.add_module('bn{0}'.format(conv_id), BN2d(filters))
else:
model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
if activation == 'leaky':
model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
elif activation == 'relu':
model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
prev_filters = filters
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'maxpool':
pool_size = int(block['size'])
stride = int(block['stride'])
if stride > 1:
model = nn.MaxPool2d(pool_size, stride)
else:
model = MaxPoolStride1()
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'avgpool':
model = GlobalAvgPool2d()
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'softmax':
model = nn.Softmax()
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'cost':
if block['_type'] == 'sse':
model = nn.MSELoss(size_average=True)
elif block['_type'] == 'L1':
model = nn.L1Loss(size_average=True)
elif block['_type'] == 'smooth':
model = nn.SmoothL1Loss(size_average=True)
out_filters.append(1)
models.append(model)
elif block['type'] == 'reorg':
stride = int(block['stride'])
prev_filters = stride * stride * prev_filters
out_filters.append(prev_filters)
models.append(Reorg(stride))
elif block['type'] == 'route':
layers = block['layers'].split(',')
ind = len(models)
layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
if len(layers) == 1:
prev_filters = out_filters[layers[0]]
elif len(layers) == 2:
assert(layers[0] == ind - 1)
prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
out_filters.append(prev_filters)
models.append(EmptyModule())
elif block['type'] == 'shortcut':
ind = len(models)
prev_filters = out_filters[ind-1]
out_filters.append(prev_filters)
models.append(EmptyModule())
elif block['type'] == 'connected':
filters = int(block['output'])
if block['activation'] == 'linear':
model = nn.Linear(prev_filters, filters)
elif block['activation'] == 'leaky':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.LeakyReLU(0.1, inplace=True))
elif block['activation'] == 'relu':
model = nn.Sequential(
nn.Linear(prev_filters, filters),
nn.ReLU(inplace=True))
prev_filters = filters
out_filters.append(prev_filters)
models.append(model)
elif block['type'] == 'region':
loss = RegionLoss()
anchors = block['anchors'].split(',')
loss.anchors = [float(i) for i in anchors]
loss.num_classes = int(block['classes'])
loss.num_anchors = int(block['num'])
loss.anchor_step = len(loss.anchors)/loss.num_anchors
loss.object_scale = float(block['object_scale'])
loss.noobject_scale = float(block['noobject_scale'])
loss.class_scale = float(block['class_scale'])
loss.coord_scale = float(block['coord_scale'])
out_filters.append(prev_filters)
models.append(loss)
else:
print('unknown type %s' % (block['type']))
return models
def load_weights(self, weightfile):
fp = open(weightfile, 'rb')
header = np.fromfile(fp, count=4, dtype=np.int32)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
buf = np.fromfile(fp, dtype = np.float32)
fp.close()
start = 0
ind = -2
for block in self.blocks:
if start >= buf.size:
break
ind = ind + 1
if block['type'] == 'net':
continue
elif block['type'] == 'convolutional':
model = self.models[ind]
batch_normalize = int(block['batch_normalize'])
if batch_normalize:
start = load_conv_bn(buf, start, model[0], model[1])
else:
start = load_conv(buf, start, model[0])
elif block['type'] == 'connected':
model = self.models[ind]
if block['activation'] != 'linear':
start = load_fc(buf, start, model[0])
else:
start = load_fc(buf, start, model)
elif block['type'] == 'maxpool':
pass
elif block['type'] == 'reorg':
pass
elif block['type'] == 'route':
pass
elif block['type'] == 'shortcut':
pass
elif block['type'] == 'region':
pass
elif block['type'] == 'avgpool':
pass
elif block['type'] == 'softmax':
pass
elif block['type'] == 'cost':
pass
else:
print('unknown type %s' % (block['type']))
def load_weights_until_last(self, weightfile):
fp = open(weightfile, 'rb')
header = np.fromfile(fp, count=4, dtype=np.int32)
self.header = torch.from_numpy(header)
self.seen = self.header[3]
buf = np.fromfile(fp, dtype = np.float32)
fp.close()
start = 0
ind = -2
blocklen = len(self.blocks)
for i in range(blocklen-2):
block = self.blocks[i]
if start >= buf.size:
break
ind = ind + 1
if block['type'] == 'net':
continue
elif block['type'] == 'convolutional':
model = self.models[ind]
batch_normalize = int(block['batch_normalize'])
if batch_normalize:
start = load_conv_bn(buf, start, model[0], model[1])
else:
start = load_conv(buf, start, model[0])
elif block['type'] == 'connected':
model = self.models[ind]
if block['activation'] != 'linear':
start = load_fc(buf, start, model[0])
else:
start = load_fc(buf, start, model)
elif block['type'] == 'maxpool':
pass
elif block['type'] == 'reorg':
pass
elif block['type'] == 'route':
pass
elif block['type'] == 'shortcut':
pass
elif block['type'] == 'region':
pass
elif block['type'] == 'avgpool':
pass
elif block['type'] == 'softmax':
pass
elif block['type'] == 'cost':
pass
else:
print('unknown type %s' % (block['type']))
def save_weights(self, outfile, cutoff=0):
if cutoff <= 0:
cutoff = len(self.blocks)-1
fp = open(outfile, 'wb')
self.header[3] = self.seen
header = self.header
header.numpy().tofile(fp)
ind = -1
for blockId in range(1, cutoff+1):
ind = ind + 1
block = self.blocks[blockId]
if block['type'] == 'convolutional':
model = self.models[ind]
batch_normalize = int(block['batch_normalize'])
if batch_normalize:
save_conv_bn(fp, model[0], model[1])
else:
save_conv(fp, model[0])
elif block['type'] == 'connected':
model = self.models[ind]
if block['activation'] != 'linear':
save_fc(fc, model)
else:
save_fc(fc, model[0])
elif block['type'] == 'maxpool':
pass
elif block['type'] == 'reorg':
pass
elif block['type'] == 'route':
pass
elif block['type'] == 'shortcut':
pass
elif block['type'] == 'region':
pass
elif block['type'] == 'avgpool':
pass
elif block['type'] == 'softmax':
pass
elif block['type'] == 'cost':
pass
else:
print('unknown type %s' % (block['type']))
fp.close()

Просмотреть файл

@ -0,0 +1,94 @@
#!/usr/bin/python
# encoding: utf-8
import os
import random
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from utils import read_truths_args, read_truths, get_all_files
from image_multi import *
class listDataset(Dataset):
def __init__(self, root, shape=None, shuffle=True, transform=None, objclass=None, target_transform=None, train=False, seen=0, batch_size=64, num_workers=4, bg_file_names=None): # bg='/cvlabdata1/home/btekin/ope/data/office_bg'
with open(root, 'r') as file:
self.lines = file.readlines()
if shuffle:
random.shuffle(self.lines)
self.nSamples = len(self.lines)
self.transform = transform
self.target_transform = target_transform
self.train = train
self.shape = shape
self.seen = seen
self.batch_size = batch_size
self.num_workers = num_workers
# self.bg_file_names = get_all_files(bg)
self.bg_file_names = bg_file_names
self.objclass = objclass
def __len__(self):
return self.nSamples
def __getitem__(self, index):
assert index <= len(self), 'index range error'
imgpath = self.lines[index].rstrip()
if self.train and index % 64== 0:
if self.seen < 4000*64:
width = 13*32
self.shape = (width, width)
elif self.seen < 8000*64:
width = (random.randint(0,3) + 13)*32
self.shape = (width, width)
elif self.seen < 12000*64:
width = (random.randint(0,5) + 12)*32
self.shape = (width, width)
elif self.seen < 16000*64:
width = (random.randint(0,7) + 11)*32
self.shape = (width, width)
else: # self.seen < 20000*64:
width = (random.randint(0,9) + 10)*32
self.shape = (width, width)
if self.train:
# jitter = 0.2
jitter = 0.1
hue = 0.05
saturation = 1.5
exposure = 1.5
# Get background image path
random_bg_index = random.randint(0, len(self.bg_file_names) - 1)
bgpath = self.bg_file_names[random_bg_index]
img, label = load_data_detection(imgpath, self.shape, jitter, hue, saturation, exposure, bgpath)
label = torch.from_numpy(label)
else:
img = Image.open(imgpath).convert('RGB')
if self.shape:
img = img.resize(self.shape)
labpath = imgpath.replace('benchvise', self.objclass).replace('images', 'labels_occlusion').replace('JPEGImages', 'labels_occlusion').replace('.jpg', '.txt').replace('.png','.txt')
label = torch.zeros(50*21)
if os.path.getsize(labpath):
ow, oh = img.size
tmp = torch.from_numpy(read_truths_args(labpath, 8.0/ow))
tmp = tmp.view(-1)
tsz = tmp.numel()
if tsz > 50*21:
label = tmp[0:50*21]
elif tsz > 0:
label[0:tsz] = tmp
if self.transform is not None:
img = self.transform(img)
if self.target_transform is not None:
label = self.target_transform(label)
self.seen = self.seen + self.num_workers
return (img, label)

Просмотреть файл

@ -0,0 +1,450 @@
#!/usr/bin/python
# encoding: utf-8
import random
import os
from PIL import Image, ImageChops, ImageMath
import numpy as np
def load_data_detection_backup(imgpath, shape, jitter, hue, saturation, exposure, bgpath):
labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
maskpath = imgpath.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
## data augmentation
img = Image.open(imgpath).convert('RGB')
mask = Image.open(maskpath).convert('RGB')
bg = Image.open(bgpath).convert('RGB')
img = change_background(img, mask, bg)
img,flip,dx,dy,sx,sy = data_augmentation(img, shape, jitter, hue, saturation, exposure)
label = fill_truth_detection(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy)
return img,label
def get_add_objs(objname):
# Decide how many additional objects you will augment and what will be the other types of objects
if objname == 'ape':
add_objs = ['can', 'cat', 'duck', 'glue', 'holepuncher', 'iron', 'phone'] # eggbox
elif objname == 'benchvise':
add_objs = ['ape', 'can', 'cat', 'driller', 'duck', 'glue', 'holepuncher']
elif objname == 'cam':
add_objs = ['ape', 'benchvise', 'can', 'cat', 'driller', 'duck', 'holepuncher']
elif objname == 'can':
add_objs = ['ape', 'benchvise', 'cat', 'driller', 'duck', 'eggbox', 'holepuncher']
elif objname == 'cat':
add_objs = ['ape', 'can', 'duck', 'glue', 'holepuncher', 'eggbox', 'phone']
elif objname == 'driller':
add_objs = ['ape', 'benchvise', 'can', 'cat', 'duck', 'glue', 'holepuncher']
elif objname == 'duck':
add_objs = ['ape', 'can', 'cat', 'eggbox', 'glue', 'holepuncher', 'phone']
elif objname == 'eggbox':
add_objs = ['ape', 'benchvise', 'cam', 'can', 'cat', 'duck', 'glue', 'holepuncher']
elif objname == 'glue':
add_objs = ['ape', 'benchvise', 'cam', 'driller', 'duck', 'eggbox', 'holepuncher' ]
elif objname == 'holepuncher':
add_objs = ['benchvise', 'cam', 'can', 'cat', 'driller', 'duck', 'eggbox']
elif objname == 'iron':
add_objs = ['ape', 'benchvise', 'can', 'cat', 'driller', 'duck', 'glue']
elif objname == 'lamp':
add_objs = ['ape', 'benchvise', 'can', 'driller', 'eggbox', 'holepuncher', 'iron']
elif objname == 'phone':
add_objs = ['ape', 'benchvise', 'cam', 'can', 'driller', 'duck', 'holepuncher']
return add_objs
def mask_background(img, mask):
ow, oh = img.size
imcs = list(img.split())
maskcs = list(mask.split())
fics = list(Image.new(img.mode, img.size).split())
for c in range(len(imcs)):
negmask = maskcs[c].point(lambda i: 1 - i / 255)
posmask = maskcs[c].point(lambda i: i / 255)
fics[c] = ImageMath.eval("a * c", a=imcs[c], c=posmask).convert('L')
out = Image.merge(img.mode, tuple(fics))
return out
def scale_image_channel(im, c, v):
cs = list(im.split())
cs[c] = cs[c].point(lambda i: i * v)
out = Image.merge(im.mode, tuple(cs))
return out
def distort_image(im, hue, sat, val):
im = im.convert('HSV')
cs = list(im.split())
cs[1] = cs[1].point(lambda i: i * sat)
cs[2] = cs[2].point(lambda i: i * val)
def change_hue(x):
x += hue*255
if x > 255:
x -= 255
if x < 0:
x += 255
return x
cs[0] = cs[0].point(change_hue)
im = Image.merge(im.mode, tuple(cs))
im = im.convert('RGB')
#constrain_image(im)
return im
def rand_scale(s):
scale = random.uniform(1, s)
if(random.randint(1,10000)%2):
return scale
return 1./scale
def random_distort_image(im, hue, saturation, exposure):
dhue = random.uniform(-hue, hue)
dsat = rand_scale(saturation)
dexp = rand_scale(exposure)
res = distort_image(im, dhue, dsat, dexp)
return res
def data_augmentation(img, shape, jitter, hue, saturation, exposure):
oh = img.height
ow = img.width
dw =int(ow*jitter)
dh =int(oh*jitter)
pleft = random.randint(-dw, dw)
pright = random.randint(-dw, dw)
ptop = random.randint(-dh, dh)
pbot = random.randint(-dh, dh)
swidth = ow - pleft - pright
sheight = oh - ptop - pbot
sx = float(swidth) / ow
sy = float(sheight) / oh
flip = random.randint(1,10000)%2
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
dx = (float(pleft)/ow)/sx
dy = (float(ptop) /oh)/sy
sized = cropped.resize(shape)
if flip:
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
img = random_distort_image(sized, hue, saturation, exposure)
return img, flip, dx,dy,sx,sy
def fill_truth_detection(labpath, w, h, flip, dx, dy, sx, sy):
max_boxes = 50
label = np.zeros((max_boxes,21))
if os.path.getsize(labpath):
bs = np.loadtxt(labpath)
if bs is None:
return label
bs = np.reshape(bs, (-1, 21))
cc = 0
for i in range(bs.shape[0]):
x0 = bs[i][1]
y0 = bs[i][2]
x1 = bs[i][3]
y1 = bs[i][4]
x2 = bs[i][5]
y2 = bs[i][6]
x3 = bs[i][7]
y3 = bs[i][8]
x4 = bs[i][9]
y4 = bs[i][10]
x5 = bs[i][11]
y5 = bs[i][12]
x6 = bs[i][13]
y6 = bs[i][14]
x7 = bs[i][15]
y7 = bs[i][16]
x8 = bs[i][17]
y8 = bs[i][18]
x0 = min(0.999, max(0, x0 * sx - dx))
y0 = min(0.999, max(0, y0 * sy - dy))
x1 = min(0.999, max(0, x1 * sx - dx))
y1 = min(0.999, max(0, y1 * sy - dy))
x2 = min(0.999, max(0, x2 * sx - dx))
y2 = min(0.999, max(0, y2 * sy - dy))
x3 = min(0.999, max(0, x3 * sx - dx))
y3 = min(0.999, max(0, y3 * sy - dy))
x4 = min(0.999, max(0, x4 * sx - dx))
y4 = min(0.999, max(0, y4 * sy - dy))
x5 = min(0.999, max(0, x5 * sx - dx))
y5 = min(0.999, max(0, y5 * sy - dy))
x6 = min(0.999, max(0, x6 * sx - dx))
y6 = min(0.999, max(0, y6 * sy - dy))
x7 = min(0.999, max(0, x7 * sx - dx))
y7 = min(0.999, max(0, y7 * sy - dy))
x8 = min(0.999, max(0, x8 * sx - dx))
y8 = min(0.999, max(0, y8 * sy - dy))
bs[i][0] = bs[i][0]
bs[i][1] = x0
bs[i][2] = y0
bs[i][3] = x1
bs[i][4] = y1
bs[i][5] = x2
bs[i][6] = y2
bs[i][7] = x3
bs[i][8] = y3
bs[i][9] = x4
bs[i][10] = y4
bs[i][11] = x5
bs[i][12] = y5
bs[i][13] = x6
bs[i][14] = y6
bs[i][15] = x7
bs[i][16] = y7
bs[i][17] = x8
bs[i][18] = y8
xs = [x1, x2, x3, x4, x5, x6, x7, x8]
ys = [y1, y2, y3, y4, y5, y6, y7, y8]
min_x = min(xs);
max_x = max(xs);
min_y = min(ys);
max_y = max(ys);
bs[i][19] = max_x - min_x;
bs[i][20] = max_y - min_y;
if flip:
bs[i][1] = 0.999 - bs[i][1]
bs[i][3] = 0.999 - bs[i][3]
bs[i][5] = 0.999 - bs[i][5]
bs[i][7] = 0.999 - bs[i][7]
bs[i][9] = 0.999 - bs[i][9]
bs[i][11] = 0.999 - bs[i][11]
bs[i][13] = 0.999 - bs[i][13]
bs[i][15] = 0.999 - bs[i][15]
bs[i][17] = 0.999 - bs[i][17]
label[cc] = bs[i]
cc += 1
if cc >= 50:
break
label = np.reshape(label, (-1))
return label
def change_background(img, mask, bg):
ow, oh = img.size
bg = bg.resize((ow, oh)).convert('RGB')
imcs = list(img.split())
bgcs = list(bg.split())
maskcs = list(mask.split())
fics = list(Image.new(img.mode, img.size).split())
for c in range(len(imcs)):
negmask = maskcs[c].point(lambda i: 1 - i / 255)
posmask = maskcs[c].point(lambda i: i / 255)
fics[c] = ImageMath.eval("a * c + b * d", a=imcs[c], b=bgcs[c], c=posmask, d=negmask).convert('L')
out = Image.merge(img.mode, tuple(fics))
return out
def shifted_data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure):
ow, oh = img.size
dw =int(ow*jitter)
dh =int(oh*jitter)
pleft = random.randint(-dw, dw)
pright = random.randint(-dw, dw)
ptop = random.randint(-dh, dh)
pbot = random.randint(-dh, dh)
swidth = ow - pleft - pright
sheight = oh - ptop - pbot
sx = float(swidth) / ow
sy = float(sheight) / oh
flip = random.randint(1,10000)%2
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
mask_cropped = mask.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
cw, ch = cropped.size
shift_x = random.randint(-80, 80)
shift_y = random.randint(-80, 80)
dx = (float(pleft)/ow)/sx - (float(shift_x)/shape[0]) # FIX HERE
dy = (float(ptop) /oh)/sy - (float(shift_y)/shape[1]) # FIX HERE
# dx = (float(pleft)/ow)/sx - (float(shift_x)/ow)
# dy = (float(ptop) /oh)/sy - (float(shift_y)/oh)
sized = cropped.resize(shape)
mask_sized = mask_cropped.resize(shape)
sized = ImageChops.offset(sized, shift_x, shift_y)
mask_sized = ImageChops.offset(mask_sized, shift_x, shift_y)
if flip:
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
mask_sized = mask_sized.transpose(Image.FLIP_LEFT_RIGHT)
img = sized
mask = mask_sized
return img, mask, flip, dx,dy,sx,sy
def data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure):
ow, oh = img.size
dw =int(ow*jitter)
dh =int(oh*jitter)
pleft = random.randint(-dw, dw)
pright = random.randint(-dw, dw)
ptop = random.randint(-dh, dh)
pbot = random.randint(-dh, dh)
swidth = ow - pleft - pright
sheight = oh - ptop - pbot
sx = float(swidth) / ow
sy = float(sheight) / oh
flip = random.randint(1,10000)%2
cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
mask_cropped = mask.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1))
dx = (float(pleft)/ow)/sx
dy = (float(ptop) /oh)/sy
sized = cropped.resize(shape)
mask_sized = mask_cropped.resize(shape)
if flip:
sized = sized.transpose(Image.FLIP_LEFT_RIGHT)
mask_sized = mask_sized.transpose(Image.FLIP_LEFT_RIGHT)
img = sized
mask = mask_sized
return img, mask, flip, dx,dy,sx,sy
def superimpose_masked_imgs(masked_img, mask, total_mask):
ow, oh = masked_img.size
total_mask = total_mask.resize((ow, oh)).convert('RGB')
imcs = list(masked_img.split())
bgcs = list(total_mask.split())
maskcs = list(mask.split())
fics = list(Image.new(masked_img.mode, masked_img.size).split())
for c in range(len(imcs)):
negmask = maskcs[c].point(lambda i: 1 - i / 255)
posmask = maskcs[c].point(lambda i: i / 255)
fics[c] = ImageMath.eval("a * c + b * d", a=imcs[c], b=bgcs[c], c=posmask, d=negmask).convert('L')
out = Image.merge(masked_img.mode, tuple(fics))
return out
def superimpose_masks(mask, total_mask):
# bg: total_mask
ow, oh = mask.size
total_mask = total_mask.resize((ow, oh)).convert('RGB')
total_maskcs = list(total_mask.split())
maskcs = list(mask.split())
fics = list(Image.new(mask.mode, mask.size).split())
for c in range(len(maskcs)):
negmask = maskcs[c].point(lambda i: 1 - i / 255)
posmask = maskcs[c].point(lambda i: i)
fics[c] = ImageMath.eval("c + b * d", b=total_maskcs[c], c=posmask, d=negmask).convert('L')
out = Image.merge(mask.mode, tuple(fics))
return out
def augment_objects(imgpath, objname, add_objs, shape, jitter, hue, saturation, exposure):
pixelThreshold = 200
random.shuffle(add_objs)
labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
maskpath = imgpath.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
# Read the image and the mask
img = Image.open(imgpath).convert('RGB')
iw, ih = img.size
mask = Image.open(maskpath).convert('RGB')
img,mask,flip,dx,dy,sx,sy = shifted_data_augmentation_with_mask(img, mask, shape, jitter, hue, saturation, exposure)
label = fill_truth_detection(labpath, iw, ih, flip, dx, dy, 1./sx, 1./sy)
total_label = np.reshape(label, (-1, 21))
# Mask the background
masked_img = mask_background(img, mask)
mask = mask.resize(shape)
masked_img = masked_img.resize(shape)
# Initialize the total mask and total masked image
total_mask = mask
total_masked_img = masked_img
count = 1
for obj in add_objs:
successful = False
while not successful:
objpath = '/cvlabdata1/home/btekin/ope/yolo6D/LINEMOD/' + obj + '/train.txt'
with open(objpath, 'r') as objfile:
objlines = objfile.readlines()
rand_index = random.randint(0, len(objlines) - 1)
obj_rand_img_path = objlines[rand_index].rstrip()
obj_rand_mask_path = obj_rand_img_path.replace('JPEGImages', 'mask').replace('/00', '/').replace('.jpg', '.png')
obj_rand_lab_path = obj_rand_img_path.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
obj_rand_img = Image.open(obj_rand_img_path).convert('RGB')
obj_rand_mask = Image.open(obj_rand_mask_path).convert('RGB')
obj_rand_masked_img = mask_background(obj_rand_img, obj_rand_mask)
obj_rand_masked_img,obj_rand_mask,flip,dx,dy,sx,sy = data_augmentation_with_mask(obj_rand_masked_img, obj_rand_mask, shape, jitter, hue, saturation, exposure)
obj_rand_label = fill_truth_detection(obj_rand_lab_path, iw, ih, flip, dx, dy, 1./sx, 1./sy)
# compute intersection (ratio of the object part intersecting with other object parts over the area of the object)
xx = np.array(obj_rand_mask)
xx = np.where(xx > pixelThreshold, 1, 0)
yy = np.array(total_mask)
yy = np.where(yy > pixelThreshold, 1, 0)
intersection = (xx * yy)
if (np.sum(xx) < 0.01) and (np.sum(xx) > -0.01):
successful = False
continue
intersection_ratio = float(np.sum(intersection)) / float(np.sum(xx))
if intersection_ratio < 0.2:
successful = True
total_mask = superimpose_masks(obj_rand_mask, total_mask) # total_mask + obj_rand_mask
total_masked_img = superimpose_masked_imgs(obj_rand_masked_img, obj_rand_mask, total_masked_img) # total_masked_img + obj_rand_masked_img
obj_rand_label = np.reshape(obj_rand_label, (-1, 21))
total_label[count, :] = obj_rand_label[0, :]
count = count + 1
else:
successful = False
total_masked_img = superimpose_masked_imgs(masked_img, mask, total_masked_img)
return total_masked_img, np.reshape(total_label, (-1)), total_mask
def load_data_detection(imgpath, shape, jitter, hue, saturation, exposure, bgpath):
# Read the background image
bg = Image.open(bgpath).convert('RGB')
# Understand which object it is and get the neighboring objects
dirname = os.path.dirname(os.path.dirname(imgpath)) ## dir of dir of file
objname = os.path.basename(dirname)
add_objs = get_add_objs(objname)
# Add additional objects in the scene, apply data augmentation on the objects
total_masked_img, label, total_mask = augment_objects(imgpath, objname, add_objs, shape, jitter, hue, saturation, exposure)
img = change_background(total_masked_img, total_mask, bg)
lb = np.reshape(label, (-1, 21))
return img,label

Просмотреть файл

@ -0,0 +1,309 @@
import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from utils import *
def build_targets(pred_corners, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, sil_thresh, seen):
nB = target.size(0)
nA = num_anchors
nC = num_classes
anchor_step = len(anchors)/num_anchors
conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
coord_mask = torch.zeros(nB, nA, nH, nW)
cls_mask = torch.zeros(nB, nA, nH, nW)
tx0 = torch.zeros(nB, nA, nH, nW)
ty0 = torch.zeros(nB, nA, nH, nW)
tx1 = torch.zeros(nB, nA, nH, nW)
ty1 = torch.zeros(nB, nA, nH, nW)
tx2 = torch.zeros(nB, nA, nH, nW)
ty2 = torch.zeros(nB, nA, nH, nW)
tx3 = torch.zeros(nB, nA, nH, nW)
ty3 = torch.zeros(nB, nA, nH, nW)
tx4 = torch.zeros(nB, nA, nH, nW)
ty4 = torch.zeros(nB, nA, nH, nW)
tx5 = torch.zeros(nB, nA, nH, nW)
ty5 = torch.zeros(nB, nA, nH, nW)
tx6 = torch.zeros(nB, nA, nH, nW)
ty6 = torch.zeros(nB, nA, nH, nW)
tx7 = torch.zeros(nB, nA, nH, nW)
ty7 = torch.zeros(nB, nA, nH, nW)
tx8 = torch.zeros(nB, nA, nH, nW)
ty8 = torch.zeros(nB, nA, nH, nW)
tconf = torch.zeros(nB, nA, nH, nW)
tcls = torch.zeros(nB, nA, nH, nW)
nAnchors = nA*nH*nW
nPixels = nH*nW
for b in xrange(nB):
cur_pred_corners = pred_corners[b*nAnchors:(b+1)*nAnchors].t()
cur_confs = torch.zeros(nAnchors)
for t in xrange(50):
if target[b][t*21+1] == 0:
break
gx0 = target[b][t*21+1]*nW
gy0 = target[b][t*21+2]*nH
gx1 = target[b][t*21+3]*nW
gy1 = target[b][t*21+4]*nH
gx2 = target[b][t*21+5]*nW
gy2 = target[b][t*21+6]*nH
gx3 = target[b][t*21+7]*nW
gy3 = target[b][t*21+8]*nH
gx4 = target[b][t*21+9]*nW
gy4 = target[b][t*21+10]*nH
gx5 = target[b][t*21+11]*nW
gy5 = target[b][t*21+12]*nH
gx6 = target[b][t*21+13]*nW
gy6 = target[b][t*21+14]*nH
gx7 = target[b][t*21+15]*nW
gy7 = target[b][t*21+16]*nH
gx8 = target[b][t*21+17]*nW
gy8 = target[b][t*21+18]*nH
cur_gt_corners = torch.FloatTensor([gx0/nW,gy0/nH,gx1/nW,gy1/nH,gx2/nW,gy2/nH,gx3/nW,gy3/nH,gx4/nW,gy4/nH,gx5/nW,gy5/nH,gx6/nW,gy6/nH,gx7/nW,gy7/nH,gx8/nW,gy8/nH]).repeat(nAnchors,1).t() # 16 x nAnchors
cur_confs = torch.max(cur_confs, corner_confidences9(cur_pred_corners, cur_gt_corners)) # some irrelevant areas are filtered, in the same grid multiple anchor boxes might exceed the threshold
conf_mask[b][cur_confs>sil_thresh] = 0
if seen < -1:#6400:
tx0.fill_(0.5)
ty0.fill_(0.5)
tx1.fill_(0.5)
ty1.fill_(0.5)
tx2.fill_(0.5)
ty2.fill_(0.5)
tx3.fill_(0.5)
ty3.fill_(0.5)
tx4.fill_(0.5)
ty4.fill_(0.5)
tx5.fill_(0.5)
ty5.fill_(0.5)
tx6.fill_(0.5)
ty6.fill_(0.5)
tx7.fill_(0.5)
ty7.fill_(0.5)
tx8.fill_(0.5)
ty8.fill_(0.5)
coord_mask.fill_(1)
nGT = 0
nCorrect = 0
for b in xrange(nB):
for t in xrange(50):
if target[b][t*21+1] == 0:
break
nGT = nGT + 1
best_iou = 0.0
best_n = -1
min_dist = 10000
gx0 = target[b][t*21+1] * nW
gy0 = target[b][t*21+2] * nH
gi0 = int(gx0)
gj0 = int(gy0)
gx1 = target[b][t*21+3] * nW
gy1 = target[b][t*21+4] * nH
gx2 = target[b][t*21+5] * nW
gy2 = target[b][t*21+6] * nH
gx3 = target[b][t*21+7] * nW
gy3 = target[b][t*21+8] * nH
gx4 = target[b][t*21+9] * nW
gy4 = target[b][t*21+10] * nH
gx5 = target[b][t*21+11] * nW
gy5 = target[b][t*21+12] * nH
gx6 = target[b][t*21+13] * nW
gy6 = target[b][t*21+14] * nH
gx7 = target[b][t*21+15] * nW
gy7 = target[b][t*21+16] * nH
gx8 = target[b][t*21+17] * nW
gy8 = target[b][t*21+18] * nH
gw = target[b][t*21+19]*nW
gh = target[b][t*21+20]*nH
gt_box = [0, 0, gw, gh]
for n in xrange(nA):
aw = anchors[anchor_step*n]
ah = anchors[anchor_step*n+1]
anchor_box = [0, 0, aw, ah]
iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
if iou > best_iou:
best_iou = iou
best_n = n
gt_box = [gx0/nW,gy0/nH,gx1/nW,gy1/nH,gx2/nW,gy2/nH,gx3/nW,gy3/nH,gx4/nW,gy4/nH,gx5/nW,gy5/nH,gx6/nW,gy6/nH,gx7/nW,gy7/nH,gx8/nW,gy8/nH]
pred_box = pred_corners[b*nAnchors+best_n*nPixels+gj0*nW+gi0]
conf = corner_confidence9(gt_box, pred_box)
coord_mask[b][best_n][gj0][gi0] = 1
cls_mask[b][best_n][gj0][gi0] = 1
conf_mask[b][best_n][gj0][gi0] = object_scale
tx0[b][best_n][gj0][gi0] = target[b][t*21+1] * nW - gi0
ty0[b][best_n][gj0][gi0] = target[b][t*21+2] * nH - gj0
tx1[b][best_n][gj0][gi0] = target[b][t*21+3] * nW - gi0
ty1[b][best_n][gj0][gi0] = target[b][t*21+4] * nH - gj0
tx2[b][best_n][gj0][gi0] = target[b][t*21+5] * nW - gi0
ty2[b][best_n][gj0][gi0] = target[b][t*21+6] * nH - gj0
tx3[b][best_n][gj0][gi0] = target[b][t*21+7] * nW - gi0
ty3[b][best_n][gj0][gi0] = target[b][t*21+8] * nH - gj0
tx4[b][best_n][gj0][gi0] = target[b][t*21+9] * nW - gi0
ty4[b][best_n][gj0][gi0] = target[b][t*21+10] * nH - gj0
tx5[b][best_n][gj0][gi0] = target[b][t*21+11] * nW - gi0
ty5[b][best_n][gj0][gi0] = target[b][t*21+12] * nH - gj0
tx6[b][best_n][gj0][gi0] = target[b][t*21+13] * nW - gi0
ty6[b][best_n][gj0][gi0] = target[b][t*21+14] * nH - gj0
tx7[b][best_n][gj0][gi0] = target[b][t*21+15] * nW - gi0
ty7[b][best_n][gj0][gi0] = target[b][t*21+16] * nH - gj0
tx8[b][best_n][gj0][gi0] = target[b][t*21+17] * nW - gi0
ty8[b][best_n][gj0][gi0] = target[b][t*21+18] * nH - gj0
tconf[b][best_n][gj0][gi0] = conf
tcls[b][best_n][gj0][gi0] = target[b][t*21]
if conf > 0.5:
nCorrect = nCorrect + 1
return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls
class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=5):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors)/num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0
def forward(self, output, target):
# Parameters
t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3)
# Activation
output = output.view(nB, nA, (19+nC), nH, nW)
x0 = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y0 = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
x1 = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
y1 = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
x2 = output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)
y2 = output.index_select(2, Variable(torch.cuda.LongTensor([5]))).view(nB, nA, nH, nW)
x3 = output.index_select(2, Variable(torch.cuda.LongTensor([6]))).view(nB, nA, nH, nW)
y3 = output.index_select(2, Variable(torch.cuda.LongTensor([7]))).view(nB, nA, nH, nW)
x4 = output.index_select(2, Variable(torch.cuda.LongTensor([8]))).view(nB, nA, nH, nW)
y4 = output.index_select(2, Variable(torch.cuda.LongTensor([9]))).view(nB, nA, nH, nW)
x5 = output.index_select(2, Variable(torch.cuda.LongTensor([10]))).view(nB, nA, nH, nW)
y5 = output.index_select(2, Variable(torch.cuda.LongTensor([11]))).view(nB, nA, nH, nW)
x6 = output.index_select(2, Variable(torch.cuda.LongTensor([12]))).view(nB, nA, nH, nW)
y6 = output.index_select(2, Variable(torch.cuda.LongTensor([13]))).view(nB, nA, nH, nW)
x7 = output.index_select(2, Variable(torch.cuda.LongTensor([14]))).view(nB, nA, nH, nW)
y7 = output.index_select(2, Variable(torch.cuda.LongTensor([15]))).view(nB, nA, nH, nW)
x8 = output.index_select(2, Variable(torch.cuda.LongTensor([16]))).view(nB, nA, nH, nW)
y8 = output.index_select(2, Variable(torch.cuda.LongTensor([17]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([18]))).view(nB, nA, nH, nW))
cls = output.index_select(2, Variable(torch.linspace(19,19+nC-1,nC).long().cuda()))
cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
t1 = time.time()
# Create pred boxes
pred_corners = torch.cuda.FloatTensor(18, nB*nA*nH*nW)
grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
pred_corners[0] = (x0.data + grid_x) / nW
pred_corners[1] = (y0.data + grid_y) / nH
pred_corners[2] = (x1.data + grid_x) / nW
pred_corners[3] = (y1.data + grid_y) / nH
pred_corners[4] = (x2.data + grid_x) / nW
pred_corners[5] = (y2.data + grid_y) / nH
pred_corners[6] = (x3.data + grid_x) / nW
pred_corners[7] = (y3.data + grid_y) / nH
pred_corners[8] = (x4.data + grid_x) / nW
pred_corners[9] = (y4.data + grid_y) / nH
pred_corners[10] = (x5.data + grid_x) / nW
pred_corners[11] = (y5.data + grid_y) / nH
pred_corners[12] = (x6.data + grid_x) / nW
pred_corners[13] = (y6.data + grid_y) / nH
pred_corners[14] = (x7.data + grid_x) / nW
pred_corners[15] = (y7.data + grid_y) / nH
pred_corners[16] = (x8.data + grid_x) / nW
pred_corners[17] = (y8.data + grid_y) / nH
gpu_matrix = pred_corners.transpose(0,1).contiguous().view(-1,18)
pred_corners = convert2cpu(gpu_matrix)
t2 = time.time()
# Build targets
nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls = \
build_targets(pred_corners, target.data, self.anchors, nA, nC, nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
cls_mask = (cls_mask == 1)
nProposals = int((conf > 0.25).sum().data[0])
tx0 = Variable(tx0.cuda())
ty0 = Variable(ty0.cuda())
tx1 = Variable(tx1.cuda())
ty1 = Variable(ty1.cuda())
tx2 = Variable(tx2.cuda())
ty2 = Variable(ty2.cuda())
tx3 = Variable(tx3.cuda())
ty3 = Variable(ty3.cuda())
tx4 = Variable(tx4.cuda())
ty4 = Variable(ty4.cuda())
tx5 = Variable(tx5.cuda())
ty5 = Variable(ty5.cuda())
tx6 = Variable(tx6.cuda())
ty6 = Variable(ty6.cuda())
tx7 = Variable(tx7.cuda())
ty7 = Variable(ty7.cuda())
tx8 = Variable(tx8.cuda())
ty8 = Variable(ty8.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,nC).cuda())
cls = cls[cls_mask].view(-1, nC)
t3 = time.time()
# Create loss
loss_x0 = self.coord_scale * nn.MSELoss(size_average=False)(x0*coord_mask, tx0*coord_mask)/2.0
loss_y0 = self.coord_scale * nn.MSELoss(size_average=False)(y0*coord_mask, ty0*coord_mask)/2.0
loss_x1 = self.coord_scale * nn.MSELoss(size_average=False)(x1*coord_mask, tx1*coord_mask)/2.0
loss_y1 = self.coord_scale * nn.MSELoss(size_average=False)(y1*coord_mask, ty1*coord_mask)/2.0
loss_x2 = self.coord_scale * nn.MSELoss(size_average=False)(x2*coord_mask, tx2*coord_mask)/2.0
loss_y2 = self.coord_scale * nn.MSELoss(size_average=False)(y2*coord_mask, ty2*coord_mask)/2.0
loss_x3 = self.coord_scale * nn.MSELoss(size_average=False)(x3*coord_mask, tx3*coord_mask)/2.0
loss_y3 = self.coord_scale * nn.MSELoss(size_average=False)(y3*coord_mask, ty3*coord_mask)/2.0
loss_x4 = self.coord_scale * nn.MSELoss(size_average=False)(x4*coord_mask, tx4*coord_mask)/2.0
loss_y4 = self.coord_scale * nn.MSELoss(size_average=False)(y4*coord_mask, ty4*coord_mask)/2.0
loss_x5 = self.coord_scale * nn.MSELoss(size_average=False)(x5*coord_mask, tx5*coord_mask)/2.0
loss_y5 = self.coord_scale * nn.MSELoss(size_average=False)(y5*coord_mask, ty5*coord_mask)/2.0
loss_x6 = self.coord_scale * nn.MSELoss(size_average=False)(x6*coord_mask, tx6*coord_mask)/2.0
loss_y6 = self.coord_scale * nn.MSELoss(size_average=False)(y6*coord_mask, ty6*coord_mask)/2.0
loss_x7 = self.coord_scale * nn.MSELoss(size_average=False)(x7*coord_mask, tx7*coord_mask)/2.0
loss_y7 = self.coord_scale * nn.MSELoss(size_average=False)(y7*coord_mask, ty7*coord_mask)/2.0
loss_x8 = self.coord_scale * nn.MSELoss(size_average=False)(x8*coord_mask, tx8*coord_mask)/2.0
loss_y8 = self.coord_scale * nn.MSELoss(size_average=False)(y8*coord_mask, ty8*coord_mask)/2.0
loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
loss_x = loss_x0 + loss_x1 + loss_x2 + loss_x3 + loss_x4 + loss_x5 + loss_x6 + loss_x7 + loss_x8
loss_y = loss_y0 + loss_y1 + loss_y2 + loss_y3 + loss_y4 + loss_y5 + loss_y6 + loss_y7 + loss_y8
loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
loss = loss_x + loss_y + loss_conf + loss_cls
print('%d: nGT %d, recall %d, proposals %d, loss: x0: %f x %f, y0: %f y %f, conf %f, cls %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x0.data[0], loss_x.data[0], loss_y0.data[0], loss_y.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0]))
#else:
# loss = loss_x + loss_y + loss_conf
# print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, conf %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_conf.data[0], loss.data[0]))
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_corners : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
return loss

Просмотреть файл

@ -0,0 +1,424 @@
from __future__ import print_function
import os
os.sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
import numpy as np
import random
import math
import shutil
from torchvision import datasets, transforms
from torch.autograd import Variable # Useful info about autograd: http://pytorch.org/docs/master/notes/autograd.html
from darknet_multi import Darknet
from MeshPly import MeshPly
from utils import *
from cfg import parse_cfg
import dataset_multi
from region_loss_multi import RegionLoss
# Create new directory
def makedirs(path):
if not os.path.exists( path ):
os.makedirs( path )
# Adjust learning rate during training, learning schedule can be changed in network config file
def adjust_learning_rate(optimizer, batch):
lr = learning_rate
for i in range(len(steps)):
scale = scales[i] if i < len(scales) else 1
if batch >= steps[i]:
lr = lr * scale
if batch == steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = lr/batch_size
return lr
def train(epoch):
global processed_batches
# Initialize timer
t0 = time.time()
# Get the dataloader for training dataset
train_loader = torch.utils.data.DataLoader(dataset_multi.listDataset(trainlist, shape=(init_width, init_height),
shuffle=True,
transform=transforms.Compose([transforms.ToTensor(),]),
train=True,
seen=model.module.seen,
batch_size=batch_size,
num_workers=num_workers, bg_file_names=bg_file_names),
batch_size=batch_size, shuffle=False, **kwargs)
# TRAINING
lr = adjust_learning_rate(optimizer, processed_batches)
logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr))
# Start training
model.train()
t1 = time.time()
avg_time = torch.zeros(9)
niter = 0
# Iterate through batches
for batch_idx, (data, target) in enumerate(train_loader):
t2 = time.time()
# adjust learning rate
adjust_learning_rate(optimizer, processed_batches)
processed_batches = processed_batches + 1
# Pass the data to GPU
if use_cuda:
data = data.cuda()
t3 = time.time()
# Wrap tensors in Variable class for automatic differentiation
data, target = Variable(data), Variable(target)
t4 = time.time()
# Zero the gradients before running the backward pass
optimizer.zero_grad()
t5 = time.time()
# Forward pass
output = model(data)
t6 = time.time()
region_loss.seen = region_loss.seen + data.data.size(0)
# Compute loss, grow an array of losses for saving later on
loss = region_loss(output, target)
training_iters.append(epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter)
training_losses.append(convert2cpu(loss.data))
niter += 1
t7 = time.time()
# Backprop: compute gradient of the loss with respect to model parameters
loss.backward()
t8 = time.time()
# Update weights
optimizer.step()
t9 = time.time()
# Print time statistics
if False and batch_idx > 1:
avg_time[0] = avg_time[0] + (t2-t1)
avg_time[1] = avg_time[1] + (t3-t2)
avg_time[2] = avg_time[2] + (t4-t3)
avg_time[3] = avg_time[3] + (t5-t4)
avg_time[4] = avg_time[4] + (t6-t5)
avg_time[5] = avg_time[5] + (t7-t6)
avg_time[6] = avg_time[6] + (t8-t7)
avg_time[7] = avg_time[7] + (t9-t8)
avg_time[8] = avg_time[8] + (t9-t1)
print('-------------------------------')
print(' load data : %f' % (avg_time[0]/(batch_idx)))
print(' cpu to cuda : %f' % (avg_time[1]/(batch_idx)))
print('cuda to variable : %f' % (avg_time[2]/(batch_idx)))
print(' zero_grad : %f' % (avg_time[3]/(batch_idx)))
print(' forward feature : %f' % (avg_time[4]/(batch_idx)))
print(' forward loss : %f' % (avg_time[5]/(batch_idx)))
print(' backward : %f' % (avg_time[6]/(batch_idx)))
print(' step : %f' % (avg_time[7]/(batch_idx)))
print(' total : %f' % (avg_time[8]/(batch_idx)))
t1 = time.time()
t1 = time.time()
return epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter - 1
def eval(niter, datacfg, cfgfile):
def truths_length(truths):
for i in range(50):
if truths[i][1] == 0:
return i
# Parse configuration files
options = read_data_cfg(datacfg)
valid_images = options['valid']
meshname = options['mesh']
backupdir = options['backup']
name = options['name']
prefix = 'results'
# Read object model information, get 3D bounding box corners
mesh = MeshPly(meshname)
vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
corners3D = get_3D_corners(vertices)
# Read intrinsic camera parameters
internal_calibration = get_camera_intrinsic()
# Get validation file names
with open(valid_images) as fp:
tmp_files = fp.readlines()
valid_files = [item.rstrip() for item in tmp_files]
# Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode
model.eval()
# Get the parser for the test dataset
valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.module.width, model.module.height),
shuffle=False,
objclass=name,
transform=transforms.Compose([
transforms.ToTensor(),
]))
valid_batchsize = 1
# Specify the number of workers for multiple processing, get the dataloader for the test dataset
kwargs = {'num_workers': 4, 'pin_memory': True}
test_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs)
# Parameters
num_classes = model.module.num_classes
anchors = model.module.anchors
num_anchors = model.module.num_anchors
testing_error_pixel = 0.0
testing_samples = 0.0
errs_2d = []
logging(" Number of test samples: %d" % len(test_loader.dataset))
# Iterate through test examples
for batch_idx, (data, target) in enumerate(test_loader):
t1 = time.time()
# Pass the data to GPU
if use_cuda:
data = data.cuda()
target = target.cuda()
# Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
data = Variable(data, volatile=True)
t2 = time.time()
# Formward pass
output = model(data).data
t3 = time.time()
# Using confidence threshold, eliminate low-confidence predictions
trgt = target[0].view(-1, 21)
all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
t4 = time.time()
# Iterate through all batch elements
for i in range(output.size(0)):
# For each image, get all the predictions
boxes = all_boxes[i]
# For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
truths = target[i].view(-1, 21)
# Get how many objects are present in the scene
num_gts = truths_length(truths)
# Iterate through each ground-truth object
for k in range(num_gts):
box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6],
truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12],
truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]
best_conf_est = -1
# If the prediction has the highest confidence, choose it as our prediction
for j in range(len(boxes)):
if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):
best_conf_est = boxes[j][18]
box_pr = boxes[j]
bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
iou = bbox_iou(bb2d_gt, bb2d_pr)
match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
# Denormalize the corner predictions
corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width
corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height
corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width
corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height
corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION
# Compute [R|t] by pnp
objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')
K = np.array(internal_calibration, dtype='float32')
R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
# Compute pixel error
Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration)
proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration))
proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration))
norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
pixel_dist = np.mean(norm)
errs_2d.append(pixel_dist)
# Sum errors
testing_error_pixel += pixel_dist
testing_samples += 1
t5 = time.time()
# Compute 2D reprojection score
for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
if True:
logging('-----------------------------------')
logging(' tensor to cuda : %f' % (t2 - t1))
logging(' predict : %f' % (t3 - t2))
logging('get_region_boxes : %f' % (t4 - t3))
logging(' eval : %f' % (t5 - t4))
logging(' total : %f' % (t5 - t1))
logging('-----------------------------------')
# Register losses and errors for saving later on
testing_iters.append(niter)
testing_errors_pixel.append(testing_error_pixel/(float(testing_samples)+eps))
testing_accuracies.append(acc)
def test(niter):
cfgfile = 'cfg/yolo-pose-multi.cfg'
datacfg = 'cfg/ape_occlusion.data'
logging("Testing ape...")
eval(niter, datacfg, cfgfile)
datacfg = 'cfg/can_occlusion.data'
logging("Testing can...")
eval(niter, datacfg, cfgfile)
datacfg = 'cfg/cat_occlusion.data'
logging("Testing cat...")
eval(niter, datacfg, cfgfile)
datacfg = 'cfg/duck_occlusion.data'
logging("Testing duck...")
eval(niter, datacfg, cfgfile)
datacfg = 'cfg/driller_occlusion.data'
logging("Testing driller...")
eval(niter, datacfg, cfgfile)
datacfg = 'cfg/glue_occlusion.data'
logging("Testing glue...")
eval(niter, datacfg, cfgfile)
# datacfg = 'cfg/holepuncher_occlusion.data'
# logging("Testing holepuncher...")
# eval(niter, datacfg, cfgfile)
if __name__ == "__main__":
# Training settings
datacfg = sys.argv[1]
cfgfile = sys.argv[2]
weightfile = sys.argv[3]
# Parse configuration files
data_options = read_data_cfg(datacfg)
net_options = parse_cfg(cfgfile)[0]
trainlist = data_options['train']
nsamples = file_lines(trainlist)
gpus = data_options['gpus'] # e.g. 0,1,2,3
gpus = '0'
num_workers = int(data_options['num_workers'])
backupdir = data_options['backup']
if not os.path.exists(backupdir):
makedirs(backupdir)
batch_size = int(net_options['batch'])
max_batches = int(net_options['max_batches'])
learning_rate = float(net_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in net_options['steps'].split(',')]
scales = [float(scale) for scale in net_options['scales'].split(',')]
bg_file_names = get_all_files('../VOCdevkit/VOC2012/JPEGImages')
# Train parameters
max_epochs = 700 # max_batches*batch_size/nsamples+1
use_cuda = True
seed = int(time.time())
eps = 1e-5
save_interval = 10 # epoches
dot_interval = 70 # batches
best_acc = -1
# Test parameters
conf_thresh = 0.05
nms_thresh = 0.4
match_thresh = 0.5
iou_thresh = 0.5
im_width = 640
im_height = 480
# Specify which gpus to use
torch.manual_seed(seed)
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed)
# Specifiy the model and the loss
model = Darknet(cfgfile)
region_loss = model.loss
# Model settings
# model.load_weights(weightfile)
model.load_weights_until_last(weightfile)
model.print_network()
model.seen = 0
region_loss.iter = model.iter
region_loss.seen = model.seen
processed_batches = model.seen/batch_size
init_width = model.width
init_height = model.height
init_epoch = model.seen/nsamples
# Variable to save
training_iters = []
training_losses = []
testing_iters = []
testing_errors_pixel = []
testing_accuracies = []
# Specify the number of workers
kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
# Pass the model to GPU
if use_cuda:
# model = model.cuda()
model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # Multiple GPU parallelism
# Get the optimizer
params_dict = dict(model.named_parameters())
params = []
for key, value in params_dict.items():
if key.find('.bn') >= 0 or key.find('.bias') >= 0:
params += [{'params': [value], 'weight_decay': 0.0}]
else:
params += [{'params': [value], 'weight_decay': decay*batch_size}]
optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)
# optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimization
evaluate = False
if evaluate:
logging('evaluating ...')
test(0, 0)
else:
for epoch in range(init_epoch, max_epochs):
# TRAIN
niter = train(epoch)
# TEST and SAVE
if (epoch % 20 == 0) and (epoch is not 0):
test(niter)
logging('save training stats to %s/costs.npz' % (backupdir))
np.savez(os.path.join(backupdir, "costs.npz"),
training_iters=training_iters,
training_losses=training_losses,
testing_iters=testing_iters,
testing_accuracies=testing_accuracies,
testing_errors_pixel=testing_errors_pixel)
if (np.mean(testing_accuracies[-5:]) > best_acc ):
best_acc = np.mean(testing_accuracies[-5:])
logging('best model so far!')
logging('save weights to %s/model.weights' % (backupdir))
model.module.save_weights('%s/model.weights' % (backupdir))
shutil.copy2('%s/model.weights' % (backupdir), '%s/model_backup.weights' % (backupdir))

Просмотреть файл

@ -0,0 +1,343 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import os\n",
"os.sys.path.append('..')\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"1\"\n",
"import torch\n",
"from torch.autograd import Variable\n",
"from torchvision import datasets, transforms\n",
"from scipy.misc import imsave\n",
"import scipy.io\n",
"import warnings\n",
"import sys\n",
"warnings.filterwarnings(\"ignore\")\n",
"import matplotlib.pyplot as plt\n",
"import scipy.misc\n",
"\n",
"from darknet_multi import Darknet\n",
"from utils import *\n",
"import dataset_multi\n",
"from MeshPly import MeshPly"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-05-06 14:09:50 Testing ape...\n",
"2018-05-06 14:10:15 Acc using 5 px 2D Projection = 7.01%\n",
"2018-05-06 14:10:15 Acc using 10 px 2D Projection = 40.43%\n",
"2018-05-06 14:10:15 Acc using 15 px 2D Projection = 59.83%\n",
"2018-05-06 14:10:15 Acc using 20 px 2D Projection = 68.55%\n",
"2018-05-06 14:10:15 Acc using 25 px 2D Projection = 72.05%\n",
"2018-05-06 14:10:15 Acc using 30 px 2D Projection = 73.68%\n",
"2018-05-06 14:10:15 Acc using 35 px 2D Projection = 74.53%\n",
"2018-05-06 14:10:15 Acc using 40 px 2D Projection = 75.13%\n",
"2018-05-06 14:10:15 Acc using 45 px 2D Projection = 75.73%\n",
"2018-05-06 14:10:15 Acc using 50 px 2D Projection = 76.50%\n",
"2018-05-06 14:10:18 Testing can...\n",
"2018-05-06 14:10:47 Acc using 5 px 2D Projection = 11.18%\n",
"2018-05-06 14:10:47 Acc using 10 px 2D Projection = 57.83%\n",
"2018-05-06 14:10:47 Acc using 15 px 2D Projection = 79.95%\n",
"2018-05-06 14:10:47 Acc using 20 px 2D Projection = 85.75%\n",
"2018-05-06 14:10:47 Acc using 25 px 2D Projection = 88.73%\n",
"2018-05-06 14:10:47 Acc using 30 px 2D Projection = 90.39%\n",
"2018-05-06 14:10:47 Acc using 35 px 2D Projection = 91.80%\n",
"2018-05-06 14:10:47 Acc using 40 px 2D Projection = 93.21%\n",
"2018-05-06 14:10:47 Acc using 45 px 2D Projection = 93.62%\n",
"2018-05-06 14:10:47 Acc using 50 px 2D Projection = 93.79%\n",
"2018-05-06 14:10:50 Testing cat...\n",
"2018-05-06 14:11:16 Acc using 5 px 2D Projection = 3.62%\n",
"2018-05-06 14:11:16 Acc using 10 px 2D Projection = 23.25%\n",
"2018-05-06 14:11:16 Acc using 15 px 2D Projection = 39.51%\n",
"2018-05-06 14:11:16 Acc using 20 px 2D Projection = 49.45%\n",
"2018-05-06 14:11:16 Acc using 25 px 2D Projection = 54.76%\n",
"2018-05-06 14:11:16 Acc using 30 px 2D Projection = 57.96%\n",
"2018-05-06 14:11:16 Acc using 35 px 2D Projection = 59.56%\n",
"2018-05-06 14:11:16 Acc using 40 px 2D Projection = 60.99%\n",
"2018-05-06 14:11:16 Acc using 45 px 2D Projection = 62.51%\n",
"2018-05-06 14:11:16 Acc using 50 px 2D Projection = 63.27%\n",
"2018-05-06 14:11:19 Testing duck...\n",
"2018-05-06 14:11:42 Acc using 5 px 2D Projection = 5.07%\n",
"2018-05-06 14:11:42 Acc using 10 px 2D Projection = 18.20%\n",
"2018-05-06 14:11:42 Acc using 15 px 2D Projection = 30.88%\n",
"2018-05-06 14:11:42 Acc using 20 px 2D Projection = 55.12%\n",
"2018-05-06 14:11:42 Acc using 25 px 2D Projection = 75.15%\n",
"2018-05-06 14:11:42 Acc using 30 px 2D Projection = 81.45%\n",
"2018-05-06 14:11:42 Acc using 35 px 2D Projection = 83.20%\n",
"2018-05-06 14:11:42 Acc using 40 px 2D Projection = 83.64%\n",
"2018-05-06 14:11:42 Acc using 45 px 2D Projection = 83.90%\n",
"2018-05-06 14:11:42 Acc using 50 px 2D Projection = 84.16%\n",
"2018-05-06 14:11:45 Testing driller...\n",
"2018-05-06 14:12:10 Acc using 5 px 2D Projection = 1.40%\n",
"2018-05-06 14:12:10 Acc using 10 px 2D Projection = 17.38%\n",
"2018-05-06 14:12:10 Acc using 15 px 2D Projection = 39.87%\n",
"2018-05-06 14:12:10 Acc using 20 px 2D Projection = 62.93%\n",
"2018-05-06 14:12:10 Acc using 25 px 2D Projection = 80.64%\n",
"2018-05-06 14:12:10 Acc using 30 px 2D Projection = 89.87%\n",
"2018-05-06 14:12:10 Acc using 35 px 2D Projection = 94.89%\n",
"2018-05-06 14:12:10 Acc using 40 px 2D Projection = 95.88%\n",
"2018-05-06 14:12:10 Acc using 45 px 2D Projection = 96.54%\n",
"2018-05-06 14:12:10 Acc using 50 px 2D Projection = 96.87%\n",
"2018-05-06 14:12:13 Testing glue...\n",
"2018-05-06 14:12:31 Acc using 5 px 2D Projection = 6.53%\n",
"2018-05-06 14:12:31 Acc using 10 px 2D Projection = 26.91%\n",
"2018-05-06 14:12:31 Acc using 15 px 2D Projection = 39.65%\n",
"2018-05-06 14:12:31 Acc using 20 px 2D Projection = 46.18%\n",
"2018-05-06 14:12:31 Acc using 25 px 2D Projection = 49.50%\n",
"2018-05-06 14:12:31 Acc using 30 px 2D Projection = 51.83%\n",
"2018-05-06 14:12:31 Acc using 35 px 2D Projection = 53.05%\n",
"2018-05-06 14:12:31 Acc using 40 px 2D Projection = 53.16%\n",
"2018-05-06 14:12:31 Acc using 45 px 2D Projection = 53.93%\n",
"2018-05-06 14:12:31 Acc using 50 px 2D Projection = 54.71%\n",
"2018-05-06 14:12:45 Testing holepuncher...\n",
"2018-05-06 14:19:31 Acc using 5 px 2D Projection = 8.26%\n",
"2018-05-06 14:19:31 Acc using 10 px 2D Projection = 39.50%\n",
"2018-05-06 14:19:31 Acc using 15 px 2D Projection = 53.31%\n",
"2018-05-06 14:19:31 Acc using 20 px 2D Projection = 62.56%\n",
"2018-05-06 14:19:31 Acc using 25 px 2D Projection = 68.02%\n",
"2018-05-06 14:19:31 Acc using 30 px 2D Projection = 74.71%\n",
"2018-05-06 14:19:31 Acc using 35 px 2D Projection = 80.74%\n",
"2018-05-06 14:19:31 Acc using 40 px 2D Projection = 85.62%\n",
"2018-05-06 14:19:31 Acc using 45 px 2D Projection = 89.59%\n",
"2018-05-06 14:19:31 Acc using 50 px 2D Projection = 91.49%\n"
]
}
],
"source": [
"def valid(datacfg, cfgfile, weightfile, conf_th):\n",
" def truths_length(truths):\n",
" for i in range(50):\n",
" if truths[i][1] == 0:\n",
" return i\n",
"\n",
" # Parse configuration files\n",
" options = read_data_cfg(datacfg)\n",
" valid_images = options['valid']\n",
" meshname = options['mesh']\n",
" backupdir = options['backup']\n",
" name = options['name']\n",
" prefix = 'results'\n",
" # Read object model information, get 3D bounding box corners\n",
" mesh = MeshPly(meshname)\n",
" vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()\n",
" corners3D = get_3D_corners(vertices)\n",
" # Read intrinsic camera parameters\n",
" internal_calibration = get_camera_intrinsic()\n",
"\n",
" # Get validation file names\n",
" with open(valid_images) as fp:\n",
" tmp_files = fp.readlines()\n",
" valid_files = [item.rstrip() for item in tmp_files]\n",
" \n",
" # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode\n",
" model = Darknet(cfgfile)\n",
" model.load_weights(weightfile)\n",
" model.cuda()\n",
" model.eval()\n",
"\n",
" # Get the parser for the test dataset\n",
" valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),\n",
" shuffle=False,\n",
" objclass=name,\n",
" transform=transforms.Compose([\n",
" transforms.ToTensor(),\n",
" ]))\n",
" valid_batchsize = 1\n",
"\n",
" # Specify the number of workers for multiple processing, get the dataloader for the test dataset\n",
" kwargs = {'num_workers': 4, 'pin_memory': True}\n",
" test_loader = torch.utils.data.DataLoader(\n",
" valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) \n",
"\n",
" # Parameters\n",
" visualize = False\n",
" use_cuda = True\n",
" num_classes = 13\n",
" anchors = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]\n",
" num_anchors = 5\n",
" eps = 1e-5\n",
" conf_thresh = conf_th\n",
" iou_thresh = 0.5\n",
"\n",
" # Parameters to save\n",
" errs_2d = []\n",
" edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]\n",
" edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]\n",
"\n",
" # Iterate through test batches (Batch size for test data is 1)\n",
" count = 0\n",
" logging('Testing {}...'.format(name))\n",
" for batch_idx, (data, target) in enumerate(test_loader):\n",
" \n",
" # Images\n",
" img = data[0, :, :, :]\n",
" img = img.numpy().squeeze()\n",
" img = np.transpose(img, (1, 2, 0))\n",
" \n",
" t1 = time.time()\n",
" # Pass data to GPU\n",
" if use_cuda:\n",
" data = data.cuda()\n",
" target = target.cuda()\n",
" \n",
" # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference\n",
" data = Variable(data, volatile=True)\n",
" t2 = time.time()\n",
" \n",
" # Forward pass\n",
" output = model(data).data \n",
" t3 = time.time()\n",
" \n",
" # Using confidence threshold, eliminate low-confidence predictions\n",
" trgt = target[0].view(-1, 21)\n",
" all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) \n",
" t4 = time.time()\n",
" \n",
" # Iterate through all images in the batch\n",
" for i in range(output.size(0)):\n",
" \n",
" # For each image, get all the predictions\n",
" boxes = all_boxes[i]\n",
" \n",
" # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)\n",
" truths = target[i].view(-1, 21)\n",
" \n",
" # Get how many object are present in the scene\n",
" num_gts = truths_length(truths)\n",
"\n",
" # Iterate through each ground-truth object\n",
" for k in range(num_gts):\n",
" box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], \n",
" truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], \n",
" truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]\n",
" best_conf_est = -1\n",
" \n",
"\n",
" # If the prediction has the highest confidence, choose it as our prediction\n",
" for j in range(len(boxes)):\n",
" if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):\n",
" best_conf_est = boxes[j][18]\n",
" box_pr = boxes[j]\n",
" bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))\n",
" bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))\n",
" iou = bbox_iou(bb2d_gt, bb2d_pr)\n",
" match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))\n",
" \n",
" # Denormalize the corner predictions \n",
" corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')\n",
" corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')\n",
" corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640\n",
" corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480 \n",
" corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640\n",
" corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480\n",
" corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of the corners in OCCLUSION\n",
" \n",
" # Compute [R|t] by pnp\n",
" objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')\n",
" K = np.array(internal_calibration, dtype='float32')\n",
" R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)\n",
" R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)\n",
" \n",
" # Compute pixel error\n",
" Rt_gt = np.concatenate((R_gt, t_gt), axis=1)\n",
" Rt_pr = np.concatenate((R_pr, t_pr), axis=1)\n",
" proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) \n",
" proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) \n",
" proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration)) \n",
" proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration)) \n",
" norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)\n",
" pixel_dist = np.mean(norm)\n",
" errs_2d.append(pixel_dist)\n",
"\n",
" \n",
" if visualize:\n",
" # Visualize\n",
" plt.xlim((0, 640))\n",
" plt.ylim((0, 480))\n",
" plt.imshow(scipy.misc.imresize(img, (480, 640)))\n",
" # Projections\n",
" for edge in edges_corners:\n",
" plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0)\n",
" plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0)\n",
" plt.gca().invert_yaxis()\n",
" plt.show()\n",
"\n",
" t5 = time.time()\n",
"\n",
" # Compute 2D projection score\n",
" for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:\n",
" acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)\n",
" # Print test statistics\n",
" logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))\n",
"\n",
"conf_th = 0.05\n",
"cfgfile = 'cfg/yolo-pose-multi.cfg'\n",
"weightfile = 'backup_multi/model_backup2.weights'\n",
"datacfg = 'cfg/ape_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/can_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/cat_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/duck_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/driller_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/glue_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"datacfg = 'cfg/holepuncher_occlusion.data'\n",
"valid(datacfg, cfgfile, weightfile, conf_th)\n",
"\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Просмотреть файл

@ -0,0 +1,183 @@
import os
os.sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import torch
from torch.autograd import Variable
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import scipy.misc
import warnings
warnings.filterwarnings("ignore")
from darknet_multi import Darknet
from utils import *
import dataset_multi
from MeshPly import MeshPly
def valid(datacfg, cfgfile, weightfile, conf_th):
def truths_length(truths):
for i in range(50):
if truths[i][1] == 0:
return i
# Parse configuration files
options = read_data_cfg(datacfg)
valid_images = options['valid']
meshname = options['mesh']
name = options['name']
prefix = 'results'
# Read object model information, get 3D bounding box corners
mesh = MeshPly(meshname)
vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose()
corners3D = get_3D_corners(vertices)
diam = float(options['diam'])
# Read intrinsic camera parameters
internal_calibration = get_camera_intrinsic()
# Get validation file names
with open(valid_images) as fp:
tmp_files = fp.readlines()
valid_files = [item.rstrip() for item in tmp_files]
# Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode
model = Darknet(cfgfile)
model.load_weights(weightfile)
model.cuda()
model.eval()
# Get the parser for the test dataset
valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height),
shuffle=False,
objclass=name,
transform=transforms.Compose([
transforms.ToTensor(),
]))
valid_batchsize = 1
# Specify the number of workers for multiple processing, get the dataloader for the test dataset
kwargs = {'num_workers': 4, 'pin_memory': True}
test_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs)
# Parameters
use_cuda = True
num_classes = 13
anchors = [1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851]
num_anchors = 5
eps = 1e-5
conf_thresh = conf_th
iou_thresh = 0.5
# Parameters to save
errs_2d = []
edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]]
edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]]
# Iterate through test batches (Batch size for test data is 1)
logging('Testing {}...'.format(name))
for batch_idx, (data, target) in enumerate(test_loader):
t1 = time.time()
# Pass data to GPU
if use_cuda:
data = data.cuda()
target = target.cuda()
# Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference
data = Variable(data, volatile=True)
t2 = time.time()
# Forward pass
output = model(data).data
t3 = time.time()
# Using confidence threshold, eliminate low-confidence predictions
trgt = target[0].view(-1, 21)
all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0)
t4 = time.time()
# Iterate through all images in the batch
for i in range(output.size(0)):
# For each image, get all the predictions
boxes = all_boxes[i]
# For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image)
truths = target[i].view(-1, 21)
# Get how many object are present in the scene
num_gts = truths_length(truths)
# Iterate through each ground-truth object
for k in range(num_gts):
box_gt = [truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6],
truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12],
truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0]]
best_conf_est = -1
# If the prediction has the highest confidence, choose it as our prediction
for j in range(len(boxes)):
if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int(truths[k][0])):
best_conf_est = boxes[j][18]
box_pr = boxes[j]
bb2d_gt = get_2d_bb(box_gt[:18], output.size(3))
bb2d_pr = get_2d_bb(box_pr[:18], output.size(3))
iou = bbox_iou(bb2d_gt, bb2d_pr)
match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18]))
# Denormalize the corner predictions
corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32')
corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32')
corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640
corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480
corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640
corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480
corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners
# Compute [R|t] by pnp
objpoints3D = np.array(np.transpose(np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32')
K = np.array(internal_calibration, dtype='float32')
R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K)
R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K)
# Compute pixel error
Rt_gt = np.concatenate((R_gt, t_gt), axis=1)
Rt_pr = np.concatenate((R_pr, t_pr), axis=1)
proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration)
proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration)
proj_corners_gt = np.transpose(compute_projection(corners3D, Rt_gt, internal_calibration))
proj_corners_pr = np.transpose(compute_projection(corners3D, Rt_pr, internal_calibration))
norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0)
pixel_dist = np.mean(norm)
errs_2d.append(pixel_dist)
t5 = time.time()
# Compute 2D projection score
for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]:
acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d)+eps)
# Print test statistics
logging(' Acc using {} px 2D Projection = {:.2f}%'.format(px_threshold, acc))
if __name__ == '__main__' and __package__ is None:
import sys
if len(sys.argv) == 3:
conf_th = 0.05
cfgfile = sys.argv[1]
weightfile = sys.argv[2]
datacfg = 'cfg/ape_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
datacfg = 'cfg/can_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
datacfg = 'cfg/cat_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
datacfg = 'cfg/duck_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
datacfg = 'cfg/glue_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
datacfg = 'cfg/holepuncher_occlusion.data'
valid(datacfg, cfgfile, weightfile, conf_th)
else:
print('Usage:')
print(' python valid.py cfgfile weightfile')