Commit 3d940a9f authored by Francesco Gatti's avatar Francesco Gatti
Browse files

batch seems ok in yolo3_berkely

layers to be checked:
DeformableConvRT
FlattenConcatRT
ReshapeRT
RouteRT (dont know why but seems working)
parent 7c81c5a4
......@@ -163,6 +163,11 @@ N.b. The test will be slower: this is due to the INT8 calibration, which may tak
N.b. INT8 calibration requires TensorRT version greater than or equal to 6.0
### BatchSize bigger than 1
```
export TKDNN_BATCHSIZE=2
```
## mAP demo
To compute mAP, precision, recall and f1score, run the map_demo.
......
......@@ -63,6 +63,7 @@ public:
const static int MAX_BUFFERS_RT = 10;
void* buffersRT[MAX_BUFFERS_RT];
dataDim_t buffersDIM[MAX_BUFFERS_RT];
int buf_input_idx, buf_output_idx;
dataDim_t input_dim, output_dim;
......@@ -81,6 +82,13 @@ public:
return 0;
}
int getBuffersN() {
if(engineRT != nullptr)
return engineRT->getNbBindings();
else
return 0;
}
/**
Do inferece
*/
......
......@@ -42,7 +42,7 @@ public:
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
activationLEAKYForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]),
reinterpret_cast<dnnType*>(outputs[0]), size, stream);
reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, stream);
return 0;
}
......
......@@ -41,7 +41,7 @@ public:
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
activationReLUCeilingForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]),
reinterpret_cast<dnnType*>(outputs[0]), size, ceiling, stream);
reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, ceiling, stream);
return 0;
}
......
......@@ -42,7 +42,7 @@ public:
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
activationSIGMOIDForward((dnnType*)reinterpret_cast<const dnnType*>(inputs[0]),
reinterpret_cast<dnnType*>(outputs[0]), size, stream);
reinterpret_cast<dnnType*>(outputs[0]), batchSize*size, stream);
return 0;
}
......
......@@ -45,7 +45,7 @@ public:
//std::cout<<this->n<<" "<<this->c<<" "<<this->h<<" "<<this->w<<" "<<this->stride_H<<" "<<this->stride_W<<" "<<this->winSize<<" "<<this->padding<<std::endl;
dnnType *srcData = (dnnType*)reinterpret_cast<const dnnType*>(inputs[0]);
dnnType *dstData = reinterpret_cast<dnnType*>(outputs[0]);
MaxPoolingForward(srcData, dstData, this->n, this->c, this->h, this->w, this->stride_H, this->stride_W, this->winSize, this->padding);
MaxPoolingForward(srcData, dstData, batchSize, this->c, this->h, this->w, this->stride_H, this->stride_W, this->winSize, this->padding);
return 0;
}
......
......@@ -50,16 +50,16 @@ public:
for (int b = 0; b < batchSize; ++b){
for(int n = 0; n < num; ++n){
int index = entry_index(b, n*w*h, 0, batchSize);
int index = entry_index(b, n*w*h, 0);
activationLOGISTICForward(srcData + index, dstData + index, 2*w*h, stream);
index = entry_index(b, n*w*h, coords, batchSize);
index = entry_index(b, n*w*h, coords);
activationLOGISTICForward(srcData + index, dstData + index, w*h, stream);
}
}
//softmax start
int index = entry_index(0, 0, coords + 1, batchSize);
int index = entry_index(0, 0, coords + 1);
softmaxForward( srcData + index, classes, batchSize*num,
(batchSize*c*h*w)/num,
w*h, 1, w*h, 1, dstData + index, stream);
......@@ -85,10 +85,10 @@ public:
int c, h, w;
int classes, coords, num;
int entry_index(int batch, int location, int entry, int batchSize) {
int entry_index(int batch, int location, int entry) {
int n = location / (w*h);
int loc = location % (w*h);
return batch*c*h*w*batchSize + n*w*h*(coords+classes+1) + entry*w*h + loc;
return batch*c*h*w + n*w*h*(coords+classes+1) + entry*w*h + loc;
}
};
......@@ -62,10 +62,10 @@ public:
for (int b = 0; b < batchSize; ++b){
for(int n = 0; n < n_masks; ++n){
int index = entry_index(b, n*w*h, 0, batchSize);
int index = entry_index(b, n*w*h, 0);
activationLOGISTICForward(srcData + index, dstData + index, 2*w*h, stream);
index = entry_index(b, n*w*h, 4, batchSize);
index = entry_index(b, n*w*h, 4);
activationLOGISTICForward(srcData + index, dstData + index, (1+classes)*w*h, stream);
}
}
......@@ -109,10 +109,10 @@ public:
dnnType *mask;
dnnType *bias;
int entry_index(int batch, int location, int entry, int batchSize) {
int entry_index(int batch, int location, int entry) {
int n = location / (w*h);
int loc = location % (w*h);
return batch*c*h*w*batchSize + n*w*h*(4+classes+1) + entry*w*h + loc;
return batch*c*h*w + n*w*h*(4+classes+1) + entry*w*h + loc;
}
};
......@@ -182,6 +182,8 @@ NetworkRT::NetworkRT(Network *net, const char *name) {
// create GPU buffers and a stream
for(int i=0; i<engineRT->getNbBindings(); i++) {
Dims dim = engineRT->getBindingDimensions(i);
buffersDIM[i] = dataDim_t(1, dim.d[0], dim.d[1], dim.d[2]);
std::cout<<"RtBuffer "<<i<<" dim: "; buffersDIM[i].print();
checkCuda(cudaMalloc(&buffersRT[i], engineRT->getMaxBatchSize()*dim.d[0]*dim.d[1]*dim.d[2]*sizeof(dnnType)));
}
checkCuda(cudaMalloc(&output, engineRT->getMaxBatchSize()*output_dim.tot()*sizeof(dnnType)));
......@@ -326,7 +328,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Conv2d *l) {
lRT = (ILayer*) lRTconv;
Dims d = lRTconv->getOutput(0)->getDimensions();
std::cout<<"DECONV: "<<d.d[0]<<" "<<d.d[1]<<" "<<d.d[2]<<" "<<d.d[3]<<"\n";
//std::cout<<"DECONV: "<<d.d[0]<<" "<<d.d[1]<<" "<<d.d[2]<<" "<<d.d[3]<<"\n";
}
checkNULL(lRT);
......@@ -533,7 +535,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, Upsample *l) {
}
ILayer* NetworkRT::convert_layer(ITensor *input, DeformConv2d *l) {
std::cout<<"convert DEFORMABLE\n";
//std::cout<<"convert DEFORMABLE\n";
ILayer *preconv = convert_layer(input, l->preconv);
checkNULL(preconv);
......@@ -541,7 +543,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, DeformConv2d *l) {
inputs[0] = input;
inputs[1] = preconv->getOutput(0);
std::cout<<"New plugin DEFORMABLE\n";
//std::cout<<"New plugin DEFORMABLE\n";
IPlugin *plugin = new DeformableConvRT(l->chunk_dim, l->kernelH, l->kernelW, l->strideH, l->strideW, l->paddingH, l->paddingW,
l->deformableGroup, l->input_dim.n, l->input_dim.c, l->input_dim.h, l->input_dim.w,
l->output_dim.n, l->output_dim.c, l->output_dim.h, l->output_dim.w, l);
......@@ -568,7 +570,7 @@ ILayer* NetworkRT::convert_layer(ITensor *input, DeformConv2d *l) {
Weights power{dtRT, power_b, l->outputs};
Weights shift{dtRT, mean_b, l->outputs};
Weights scale{dtRT, variance_b, l->outputs};
std::cout<<lRT->getNbOutputs()<<std::endl;
//std::cout<<lRT->getNbOutputs()<<std::endl;
IScaleLayer *lRT2 = networkRT->addScale(*lRT->getOutput(0), ScaleMode::kCHANNEL,
shift, scale, power);
......@@ -628,7 +630,7 @@ IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialDa
const char * buf = reinterpret_cast<const char*>(serialData);
std::string name(layerName);
std::cout<<name<<std::endl;
//std::cout<<name<<std::endl;
if(name.find("ActivationLeaky") == 0) {
ActivationLeakyRT *a = new ActivationLeakyRT();
......
......@@ -27,11 +27,16 @@ int main(int argc, char *argv[]) {
dnnType *input_d;
checkCuda( cudaMalloc(&input_d, idim.tot()*sizeof(dnnType)));
int ret_tensorrt = 0;
std::cout<<"Testing with batchsize: "<<BATCH_SIZE<<"\n";
printCenteredTitle(" TENSORRT inference ", '=', 30);
for(int i=0; i<10; i++) {
for(int j=0; j<idim.tot(); j++) {
input[j] = ((float) rand() / (RAND_MAX));
// generate input
for(int j=0; j<netRT.input_dim.tot(); j++) {
dnnType val = ((float) rand() / (RAND_MAX));
for(int b=0; b<BATCH_SIZE; b++)
input[netRT.input_dim.tot()*b + j] = val;
}
checkCuda(cudaMemcpy(input_d, input, idim.tot()*sizeof(dnnType), cudaMemcpyHostToDevice));
......@@ -39,7 +44,17 @@ int main(int argc, char *argv[]) {
TIMER_START
netRT.infer(dim, input_d);
TIMER_STOP
// control output
for(int o=1; o<netRT.getBuffersN(); o++) {
for(int b=1; b<BATCH_SIZE; b++) {
dnnType *out_d = (dnnType*) netRT.buffersRT[o];
dnnType *out0_d = out_d;
dnnType *outI_d = out_d + netRT.buffersDIM[o].tot()*b;
//ret_tensorrt |= checkResult(netRT.buffersDIM[o].tot(), outI_d, out0_d);
}
}
}
return 0;
return ret_tensorrt;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment