Commit 04602f39 authored by Francesco Gatti's avatar Francesco Gatti
Browse files

Yolo4-tiny batched

fix #59
parent fe2e4eae
......@@ -317,6 +317,8 @@ This demo also creates a json file named ```net_name_COCO_res.json``` containing
| resnet101_cnet | Centernet (Resnet101 backend)<sup>4</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 512x512 | [weights](https://cloud.hipert.unimore.it/s/5BTjHMWBcJk8g3i/download) |
| csresnext50-panet-spp | Cross Stage Partial Network <sup>7</sup> | [COCO 2014](http://cocodataset.org/) | 80 | 416x416 | [weights](https://cloud.hipert.unimore.it/s/Kcs4xBozwY4wFx8/download) |
| yolo4 | Yolov4 <sup>8</sup> | [COCO 2017](http://cocodataset.org/) | 80 | 416x416 | [weights](https://cloud.hipert.unimore.it/s/d97CFzYqCPCp5Hg/download) |
| yolo4_berkeley | Yolov4 <sup>8</sup> | [BDD100K ](https://bair.berkeley.edu/blog/2018/05/30/bdd/) | 10 | 540x320 | [weights](https://cloud.hipert.unimore.it/s/nkWFa5fgb4NTdnB/download) |
| yolo4tiny | Yolov4 tiny | [COCO 2017](http://cocodataset.org/) | 80 | 416x416 | [weights](https://cloud.hipert.unimore.it/s/iRnc4pSqmx78gJs/download) |
## References
......
......@@ -52,16 +52,18 @@ public:
}
virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override {
dnnType *dstData = reinterpret_cast<dnnType*>(outputs[0]);
int offset = 0;
for(int i=0; i<in; i++) {
dnnType *input = (dnnType*)reinterpret_cast<const dnnType*>(inputs[i]);
int in_dim = c_in[i]*h*w;
int part_in_dim = in_dim / this->groups;
checkCuda( cudaMemcpyAsync(dstData + offset, input + this->group_id*part_in_dim, part_in_dim*sizeof(dnnType), cudaMemcpyDeviceToDevice, stream) );
offset += part_in_dim;
for(int b=0; b<batchSize; b++) {
int offset = 0;
for(int i=0; i<in; i++) {
dnnType *input = (dnnType*)reinterpret_cast<const dnnType*>(inputs[i]);
int in_dim = c_in[i]*h*w;
int part_in_dim = in_dim / this->groups;
checkCuda( cudaMemcpyAsync(dstData + b*c*w*h + offset, input + b*c*w*h*groups + this->group_id*part_in_dim, part_in_dim*sizeof(dnnType), cudaMemcpyDeviceToDevice, stream) );
offset += part_in_dim;
}
}
return 0;
......
......@@ -74,6 +74,7 @@ do
test_net yolo4
test_net yolo4_berkeley
test_net yolo4tiny
test_net yolo3
test_net yolo3_berkeley
test_net yolo3_coco4
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment