Commit adac8576 authored by Micaela Verucchi's avatar Micaela Verucchi
Browse files

Add support to Scaled-YOLO4, update Yolov4x-mish (tested)



Signed-off-by: default avatarMicaela Verucchi <micaelaverucchi@gmail.com>
parent b8855b95
......@@ -353,7 +353,8 @@ This demo also creates a json file named ```net_name_COCO_res.json``` containing
| yolo4 | Yolov4 <sup>8</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 416x416 | [weights](https://cloud.hipert.unimore.it/s/d97CFzYqCPCp5Hg/download) |
| yolo4_berkeley | Yolov4 <sup>8</sup> | [BDD100K ](https://bair.berkeley.edu/blog/2018/05/30/bdd/) | 10 | 540x320 | [weights](https://cloud.hipert.unimore.it/s/nkWFa5fgb4NTdnB/download) |
| yolo4tiny | Yolov4 tiny <sup>9</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 416x416 | [weights](https://cloud.hipert.unimore.it/s/iRnc4pSqmx78gJs/download) |
| yolo4x | Yolov4x-mish <sup>9</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 672x672 | [weights](https://cloud.hipert.unimore.it/s/BLPpiAigZJLorQD/download) |
| yolo4x | Yolov4x-mish <sup>9</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 640x640 | [weights](https://cloud.hipert.unimore.it/s/5MFjtNtgbDGdJEo/download) |
| yolo4x-cps | Scaled Yolov4 <sup>10</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 512x512 | [weights](https://cloud.hipert.unimore.it/s/AfzHE4BfTeEm2gH/download) |
## References
......@@ -367,3 +368,4 @@ This demo also creates a json file named ```net_name_COCO_res.json``` containing
7. Wang, Chien-Yao, et al. "CSPNet: A New Backbone that can Enhance Learning Capability of CNN." arXiv preprint arXiv:1911.11929 (2019).
8. Bochkovskiy, Alexey, Chien-Yao Wang, and Hong-Yuan Mark Liao. "YOLOv4: Optimal Speed and Accuracy of Object Detection." arXiv preprint arXiv:2004.10934 (2020).
9. Bochkovskiy, Alexey, "Yolo v4, v3 and v2 for Windows and Linux" (https://github.com/AlexeyAB/darknet)
10. Wang, Chien-Yao, Alexey Bochkovskiy, and Hong-Yuan Mark Liao. "Scaled-YOLOv4: Scaling Cross Stage Partial Network." arXiv preprint arXiv:2011.08036 (2020).
......@@ -19,6 +19,7 @@ enum layerType_t {
LAYER_ACTIVATION_CRELU,
LAYER_ACTIVATION_LEAKY,
LAYER_ACTIVATION_MISH,
LAYER_ACTIVATION_LOGISTIC,
LAYER_FLATTEN,
LAYER_RESHAPE,
LAYER_MULADD,
......@@ -68,6 +69,7 @@ public:
case LAYER_ACTIVATION_CRELU: return "ActivationCReLU";
case LAYER_ACTIVATION_LEAKY: return "ActivationLeaky";
case LAYER_ACTIVATION_MISH: return "ActivationMish";
case LAYER_ACTIVATION_LOGISTIC: return "ActivationLogistic";
case LAYER_FLATTEN: return "Flatten";
case LAYER_RESHAPE: return "Reshape";
case LAYER_MULADD: return "MulAdd";
......@@ -212,7 +214,8 @@ public:
typedef enum {
ACTIVATION_ELU = 100,
ACTIVATION_LEAKY = 101,
ACTIVATION_MISH = 102
ACTIVATION_MISH = 102,
ACTIVATION_LOGISTIC = 103
} tkdnnActivationMode_t;
/**
......@@ -233,6 +236,8 @@ public:
return LAYER_ACTIVATION_LEAKY;
else if (act_mode == ACTIVATION_MISH)
return LAYER_ACTIVATION_MISH;
else if (act_mode == ACTIVATION_LOGISTIC)
return LAYER_ACTIVATION_LOGISTIC;
else
return LAYER_ACTIVATION;
};
......
......@@ -24,6 +24,7 @@ template<typename T> T readBUF(const char*& buffer)
using namespace nvinfer1;
#include "pluginsRT/ActivationLeakyRT.h"
#include "pluginsRT/ActivationLogisticRT.h"
#include "pluginsRT/ActivationReLUCeilingRT.h"
#include "pluginsRT/ActivationMishRT.h"
#include "pluginsRT/ReorgRT.h"
......
#include<cassert>
#include "../kernels.h"
class ActivationLogisticRT : public IPlugin {
public:
ActivationLogisticRT() {
}
~ActivationLogisticRT(){
}
int getNbOutputs() const override {
return 1;
}
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override {
return inputs[0];
}
void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) override {
size = 1;
for(int i=0; i<outputDims[0].nbDims; i++)
size *= outputDims[0].d[i];
}
int initialize() override {
return 0;
}
virtual void terminate() override {
}
virtual size_t getWorkspaceSize(int maxBatchSize) const override {
return 0;
}
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
activationLOGISTICForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]),
reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, stream);
return 0;
}
virtual size_t getSerializationSize() override {
return 1*sizeof(int);
}
virtual void serialize(void* buffer) override {
char *buf = reinterpret_cast<char*>(buffer);
tk::dnn::writeBUF(buf, size);
}
int size;
};
......@@ -67,15 +67,17 @@ public:
for (int b = 0; b < batchSize; ++b){
for(int n = 0; n < n_masks; ++n){
int index = entry_index(b, n*w*h, 0);
if (new_coords == 1)
activationLOGISTICForward(srcData + index, dstData + index, 4*w*h, stream); //x,y,w,h
else
if (new_coords == 1){
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * w*h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
}
else{
activationLOGISTICForward(srcData + index, dstData + index, 2*w*h, stream); //x,y
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * w*h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * w*h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
index = entry_index(b, n*w*h, 4);
activationLOGISTICForward(srcData + index, dstData + index, (1+classes)*w*h, stream);
index = entry_index(b, n*w*h, 4);
activationLOGISTICForward(srcData + index, dstData + index, (1+classes)*w*h, stream);
}
}
}
......
......@@ -69,10 +69,11 @@ do
echo -e "${ORANGE}Batch $TKDNN_BATCHSIZE ${NC}"
test_net mnist
./test_imuodom &>> $out_file
print_output $? imuodom
# ./test_imuodom &>> $out_file
# print_output $? imuodom
test_net yolo4
test_net yolo4-csp
test_net yolo4x
test_net yolo4_berkeley
test_net yolo4tiny
......
......@@ -52,6 +52,10 @@ dnnType* Activation::infer(dataDim_t &dim, dnnType* srcData) {
else if(act_mode == ACTIVATION_MISH) {
activationMishForward(srcData, dstData, dim.tot());
}
else if(act_mode == ACTIVATION_LOGISTIC) {
activationLOGISTICForward(srcData, dstData, dim.tot());
} else {
dnnType alpha = dnnType(1);
dnnType beta = dnnType(0);
......
......@@ -187,6 +187,7 @@ namespace tk { namespace dnn {
if(f.activation == "relu") act = tkdnnActivationMode_t(CUDNN_ACTIVATION_RELU);
else if(f.activation == "leaky") act = tk::dnn::ACTIVATION_LEAKY;
else if(f.activation == "mish") act = tk::dnn::ACTIVATION_MISH;
else if(f.activation == "logistic") act = tk::dnn::ACTIVATION_LOGISTIC;
else { FatalError("activation not supported: " + f.activation); }
netLayers[netLayers.size()-1] = new tk::dnn::Activation(net, act);
};
......
......@@ -226,7 +226,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Layer *l) {
return convert_layer(input, (Conv2d*) l);
if(type == LAYER_POOLING)
return convert_layer(input, (Pooling*) l);
if(type == LAYER_ACTIVATION || type == LAYER_ACTIVATION_CRELU || type == LAYER_ACTIVATION_LEAKY || type == LAYER_ACTIVATION_MISH)
if(type == LAYER_ACTIVATION || type == LAYER_ACTIVATION_CRELU || type == LAYER_ACTIVATION_LEAKY || type == LAYER_ACTIVATION_MISH || type == LAYER_ACTIVATION_LOGISTIC)
return convert_layer(input, (Activation*) l);
if(type == LAYER_SOFTMAX)
return convert_layer(input, (Softmax*) l);
......@@ -421,6 +421,12 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Activation *l) {
checkNULL(lRT);
return lRT;
}
else if(l->act_mode == ACTIVATION_LOGISTIC) {
IPlugin *plugin = new ActivationLogisticRT();
IPluginLayer *lRT = networkRT->addPlugin(&input, 1, *plugin);
checkNULL(lRT);
return lRT;
}
else {
FatalError("this Activation mode is not yet implemented");
return NULL;
......@@ -653,6 +659,11 @@ IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialDa
a->size = readBUF<int>(buf);
return a;
}
if(name.find("ActivationLogistic") == 0) {
ActivationLogisticRT *a = new ActivationLogisticRT();
a->size = readBUF<int>(buf);
return a;
}
if(name.find("ActivationCReLU") == 0) {
ActivationReLUCeiling *a = new ActivationReLUCeiling(readBUF<float>(buf));
a->size = readBUF<int>(buf);
......
......@@ -72,8 +72,8 @@ Yolo::box get_yolo_box(float *x, float *biases, int n, int index, int i, int j,
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
}
else{
b.x = (i + x[index + 0 * stride] * 2 - 0.5) / lw;
b.y = (j + x[index + 1 * stride] * 2 - 0.5) / lh;
b.x = (i + x[index + 0 * stride] ) / lw;
b.y = (j + x[index + 1 * stride] ) / lh;
b.w = x[index + 2 * stride] * x[index + 2 * stride] * 4 * biases[2 * n] / w;
b.h = x[index + 3 * stride] * x[index + 3 * stride] * 4 * biases[2 * n + 1] / h;
}
......@@ -87,15 +87,18 @@ dnnType* Yolo::infer(dataDim_t &dim, dnnType* srcData) {
for (int b = 0; b < dim.n; ++b){
for(int n = 0; n < n_masks; ++n){
int index = entry_index(b, n*dim.w*dim.h, 0, classes, input_dim, output_dim);
if (new_coords == 1)
activationLOGISTICForward(srcData + index, dstData + index, 4*dim.w*dim.h);
else
std::cout<<"new_coords"<<new_coords<<std::endl;
if (new_coords == 1){
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * dim.w*dim.h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
}
else{
activationLOGISTICForward(srcData + index, dstData + index, 2*dim.w*dim.h);
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * dim.w*dim.h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
index = entry_index(b, n*dim.w*dim.h, 4, classes, input_dim, output_dim);
activationLOGISTICForward(srcData + index, dstData + index, (1+classes)*dim.w*dim.h);
if (this->scaleXY != 1) scalAdd(dstData + index, 2 * dim.w*dim.h, this->scaleXY, -0.5*(this->scaleXY - 1), 1);
index = entry_index(b, n*dim.w*dim.h, 4, classes, input_dim, output_dim);
activationLOGISTICForward(srcData + index, dstData + index, (1+classes)*dim.w*dim.h);
}
}
}
......
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=8
width=512
height=512
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500500
policy=steps
steps=400000,450000
scales=.1,.1
mosaic=1
letter_box=1
ema_alpha=0.9998
#optimized_memory=1
#23:104x104 54:52x52 85:26x26 104:13x13 for 416
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=mish
#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=mish
#[route]
#layers = -2
#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=mish
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=mish
#[route]
#layers = -1,-7
#[convolutional]
#batch_normalize=1
#filters=64
#size=1
#stride=1
#pad=1
#activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=mish
[route]
layers = -1,-10
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[route]
layers = -2
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=mish
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=mish
[shortcut]
from=-3
activation=linear
[convolutional]