Commit 40456592 authored by Micaela Verucchi's avatar Micaela Verucchi
Browse files

Adapt detection classes to use batches, adapt demos, update README



Signed-off-by: default avatarMicaela Verucchi <micaelaverucchi@gmail.com>
parent 2e3cb52c
......@@ -123,13 +123,16 @@ rm yolo3_fp32.rt # be sure to delete(or move) old tensorRT files
```
In general the demo program takes 4 parameters:
```
./demo <network-rt-file> <path-to-video> <kind-of-network> <number-of-classes>
./demo <network-rt-file> <path-to-video> <kind-of-network> <number-of-classes> <n-batches> <show-flag>
```
where
* ```<network-rt-file>``` is the rt file generated by a test
* ```<<path-to-video>``` is the path to a video file or a camera input
* ```<kind-of-network>``` is the type of network. Thee types are currently supported: ```y``` (YOLO family), ```c``` (CenterNet family) and ```m``` (MobileNet-SSD family)
* ```<number-of-classes>```is the number of classes the network is trained on
* ```<n-batches>``` number of batches to use in inference (N.B. you should first export TKDNN_BATCHSIZE to the required n_batches and create again the rt file for the network).
* ```<show-flag>``` if set to 0 the demo will not show the visualization but save the video into result.mp4 (if n-batches ==1)
N.b. By default it is used FP32 inference
![demo](https://user-images.githubusercontent.com/11562617/72547657-540e7800-388d-11ea-83c6-49dfea2a0607.gif)
......@@ -218,6 +221,8 @@ cd build
./map_demo dla34_cnet_FP32.rt c ../demo/COCO_val2017/all_labels.txt ../demo/config.yaml
```
This demo also creates a json file named ```net_name_COCO_res.json``` containing all the detections computed. The detections are in COCO format, the correct format to subit the results to [CodaLab COCO detection challenge](https://competitions.codalab.org/competitions/20794#participate).
## Existing tests and supported networks
| Test Name | Network | Dataset | N Classes | Input size | Weights |
......
......@@ -34,9 +34,15 @@ int main(int argc, char *argv[]) {
int n_classes = 80;
if(argc > 4)
n_classes = atoi(argv[4]);
bool show = true;
int n_batch = 1;
if(argc > 5)
show = atoi(argv[5]);
n_batch = atoi(argv[5]);
bool show = true;
if(argc > 6)
show = atoi(argv[6]);
if(n_batch < 1 || n_batch > 64)
FatalError("Batch dim not supported");
if(!show)
SAVE_RESULT = true;
......@@ -63,7 +69,7 @@ int main(int argc, char *argv[]) {
FatalError("Network type not allowed (3rd parameter)\n");
}
detNN->init(net, n_classes);
detNN->init(net, n_classes, n_batch);
gRun = true;
......@@ -81,30 +87,40 @@ int main(int argc, char *argv[]) {
}
cv::Mat frame;
cv::Mat dnn_input;
if(show)
cv::namedWindow("detection", cv::WINDOW_NORMAL);
std::vector<tk::dnn::box> detected_bbox;
std::vector<cv::Mat> batch_frame;
std::vector<cv::Mat> batch_dnn_input;
while(gRun) {
cap >> frame;
if(!frame.data) {
break;
}
// this will be resized to the net format
dnn_input = frame.clone();
batch_dnn_input.clear();
batch_frame.clear();
for(int bi=0; bi< n_batch; ++bi){
cap >> frame;
if(!frame.data)
break;
batch_frame.push_back(frame);
// this will be resized to the net format
batch_dnn_input.push_back(frame.clone());
}
if(!frame.data)
break;
//inference
detNN->update(dnn_input);
frame = detNN->draw(frame);
detNN->update(batch_dnn_input);
detNN->draw(batch_frame);
if(show){
cv::imshow("detection", frame);
cv::waitKey(1);
for(int bi=0; bi< n_batch; ++bi){
cv::imshow("detection", batch_frame[bi]);
cv::waitKey(1);
}
}
if(SAVE_RESULT)
if(n_batch == 1 && SAVE_RESULT)
resultVideo << frame;
}
......@@ -112,10 +128,10 @@ int main(int argc, char *argv[]) {
double mean = 0;
std::cout<<COL_GREENB<<"\n\nTime stats:\n";
std::cout<<"Min: "<<*std::min_element(detNN->stats.begin(), detNN->stats.end())<<" ms\n";
std::cout<<"Max: "<<*std::max_element(detNN->stats.begin(), detNN->stats.end())<<" ms\n";
std::cout<<"Min: "<<*std::min_element(detNN->stats.begin(), detNN->stats.end())/n_batch<<" ms\n";
std::cout<<"Max: "<<*std::max_element(detNN->stats.begin(), detNN->stats.end())/n_batch<<" ms\n";
for(int i=0; i<detNN->stats.size(); i++) mean += detNN->stats[i]; mean /= detNN->stats.size();
std::cout<<"Avg: "<<mean<<" ms\n"<<COL_END;
std::cout<<"Avg: "<<mean/n_batch<<" ms\n"<<COL_END;
return 0;
......
......@@ -132,19 +132,20 @@ int main(int argc, char *argv[])
FatalError("Wrong image file path.");
cv::Mat frame = cv::imread(f.iFilename.c_str(), cv::IMREAD_COLOR);
std::vector<cv::Mat> batch_frames;
batch_frames.push_back(frame);
int height = frame.rows;
int width = frame.cols;
cv::Mat dnn_input;
if(!frame.data)
break;
dnn_input = frame.clone();
std::vector<cv::Mat> batch_dnn_input;
batch_dnn_input.push_back(frame.clone());
//inference
detected_bbox.clear();
detNN->update(dnn_input, write_res_on_file, &times, write_coco_json);
frame = detNN->draw(frame);
detNN->update(batch_dnn_input, write_res_on_file, &times, write_coco_json);
detNN->draw(batch_frames);
detected_bbox = detNN->detected;
if(write_coco_json)
......@@ -171,7 +172,7 @@ int main(int argc, char *argv[])
myfile << d.cl << " "<< d.prob << " "<< d.x << " "<< d.y << " "<< d.w << " "<< d.h <<"\n";
if(show)// draw rectangle for detection
cv::rectangle(frame, cv::Point(d.x, d.y), cv::Point(d.x + d.w, d.y + d.h), cv::Scalar(0, 0, 255), 2);
cv::rectangle(batch_frames[0], cv::Point(d.x, d.y), cv::Point(d.x + d.w, d.y + d.h), cv::Scalar(0, 0, 255), 2);
}
if(write_dets)
......@@ -190,14 +191,14 @@ int main(int argc, char *argv[])
f.gt.push_back(b);
if(show)// draw rectangle for groundtruth
cv::rectangle(frame, cv::Point((b.x-b.w/2)*width, (b.y-b.h/2)*height), cv::Point((b.x+b.w/2)*width,(b.y+b.h/2)*height), cv::Scalar(0, 255, 0), 2);
cv::rectangle(batch_frames[0], cv::Point((b.x-b.w/2)*width, (b.y-b.h/2)*height), cv::Point((b.x+b.w/2)*width,(b.y+b.h/2)*height), cv::Scalar(0, 255, 0), 2);
}
}
images.push_back(f);
if(show){
cv::imshow("detection", frame);
cv::imshow("detection", batch_frames[0]);
cv::waitKey(0);
}
......
......@@ -73,9 +73,9 @@ public:
CenternetDetection() {};
~CenternetDetection() {};
bool init(const std::string& tensor_path, const int n_classes=80);
void preprocess(cv::Mat &frame);
void postprocess(const bool mAP=false);
bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1);
void preprocess(cv::Mat &frame, const int bi=0);
void postprocess(const int bi=0,const bool mAP=false);
};
......
......@@ -34,6 +34,8 @@ class DetectionNN {
cv::Scalar colors[256];
int nBatches = 1;
#ifdef OPENCV_CUDACONTRIB
cv::cuda::GpuMat bgr[3];
cv::cuda::GpuMat imagePreproc;
......@@ -47,21 +49,26 @@ class DetectionNN {
* This method preprocess the image, before feeding it to the NN.
*
* @param frame original frame to adapt for inference.
* @param bi batch index
*/
virtual void preprocess(cv::Mat &frame) = 0;
virtual void preprocess(cv::Mat &frame, const int bi=0) = 0;
/**
* This method postprocess the output of the NN to obtain the correct
* boundig boxes.
*
* @param bi batch index
* @param mAP set to true only if all the probabilities for a bounding
* box are needed, as in some cases for the mAP calculation
*/
virtual void postprocess(const bool mAP=false) = 0;
virtual void postprocess(const int bi=0,const bool mAP=false) = 0;
public:
int classes = 0;
float confThreshold = 0.05; /*threshold on the confidence of the boxes*/
float confThreshold = 0.3; /*threshold on the confidence of the boxes*/
std::vector<tk::dnn::box> detected; /*bounding boxes in output*/
std::vector<std::vector<tk::dnn::box>> batchDetected; /*bounding boxes in output*/
std::vector<double> stats; /*keeps track of inference times (ms)*/
std::vector<std::string> classesNames;
......@@ -74,36 +81,41 @@ class DetectionNN {
*
* @param tensor_path path to the rt file og the NN.
* @param n_classes number of classes for the given dataset.
* @param n_batches number of batches to use in inference
* @return true if everything is correct, false otherwise.
*/
virtual bool init(const std::string& tensor_path, const int n_classes=80) = 0;
virtual bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1) = 0;
/**
* This method performs the whole detection of the NN.
*
* @param frame frame to run detection on.
* @param frames frames to run detection on.
* @param save_times if set to true, preprocess, inference and postprocess times
* are saved on a csv file, otherwise not.
* @param times pointer to the output stream where to write times
* @param mAP set to true only if all the probabilities for a bounding
* box are needed, as in some cases for the mAP calculation
*/
void update(cv::Mat &frame, bool save_times=false, std::ofstream *times=nullptr, const bool mAP=false){
if(!frame.data)
FatalError("No image data feed to detection");
void update(std::vector<cv::Mat>& frames, bool save_times=false, std::ofstream *times=nullptr, const bool mAP=false){
if(save_times && times==nullptr)
FatalError("save_times set to true, but no valid ofstream given");
originalSize = frame.size();
printCenteredTitle(" TENSORRT detection ", '=', 30);
{
TIMER_START
preprocess(frame);
for(int bi=0; bi<nBatches;++bi){
if(!frames[bi].data)
FatalError("No image data feed to detection");
originalSize = frames[bi].size();
preprocess(frames[bi], bi);
}
TIMER_STOP
if(save_times) *times<<t_ns<<";";
}
//do inference
tk::dnn::dataDim_t dim = netRT->input_dim;
dim.n = nBatches;
{
dim.print();
TIMER_START
......@@ -114,9 +126,11 @@ class DetectionNN {
if(save_times) *times<<t_ns<<";";
}
batchDetected.clear();
{
TIMER_START
postprocess(mAP);
for(int bi=0; bi<nBatches;++bi)
postprocess(bi, mAP);
TIMER_STOP
if(save_times) *times<<t_ns<<"\n";
}
......@@ -125,10 +139,9 @@ class DetectionNN {
/**
* Method to draw boundixg boxes and labels on a frame.
*
* @param frame orginal frame to draw bounding box on.
* @return frame with boundig boxes.
* @param frames orginal frame to draw bounding box on.
*/
cv::Mat draw(cv::Mat &frame) {
void draw(std::vector<cv::Mat>& frames) {
tk::dnn::box b;
int x0, w, x1, y0, h, y1;
int objClass;
......@@ -137,24 +150,26 @@ class DetectionNN {
int baseline = 0;
float font_scale = 0.5;
int thickness = 2;
// draw dets
for(int i=0; i<detected.size(); i++) {
b = detected[i];
x0 = b.x;
x1 = b.x + b.w;
y0 = b.y;
y1 = b.y + b.h;
det_class = classesNames[b.cl];
// draw rectangle
cv::rectangle(frame, cv::Point(x0, y0), cv::Point(x1, y1), colors[b.cl], 2);
// draw label
cv::Size text_size = getTextSize(det_class, cv::FONT_HERSHEY_SIMPLEX, font_scale, thickness, &baseline);
cv::rectangle(frame, cv::Point(x0, y0), cv::Point((x0 + text_size.width - 2), (y0 - text_size.height - 2)), colors[b.cl], -1);
cv::putText(frame, det_class, cv::Point(x0, (y0 - (baseline / 2))), cv::FONT_HERSHEY_SIMPLEX, font_scale, cv::Scalar(255, 255, 255), thickness);
for(int bi=0; bi<frames.size(); ++bi){
// draw dets
for(int i=0; i<batchDetected[bi].size(); i++) {
b = batchDetected[bi][i];
x0 = b.x;
x1 = b.x + b.w;
y0 = b.y;
y1 = b.y + b.h;
det_class = classesNames[b.cl];
// draw rectangle
cv::rectangle(frames[bi], cv::Point(x0, y0), cv::Point(x1, y1), colors[b.cl], 2);
// draw label
cv::Size text_size = getTextSize(det_class, cv::FONT_HERSHEY_SIMPLEX, font_scale, thickness, &baseline);
cv::rectangle(frames[bi], cv::Point(x0, y0), cv::Point((x0 + text_size.width - 2), (y0 - text_size.height - 2)), colors[b.cl], -1);
cv::putText(frames[bi], det_class, cv::Point(x0, (y0 - (baseline / 2))), cv::FONT_HERSHEY_SIMPLEX, font_scale, cv::Scalar(255, 255, 255), thickness);
}
}
return frame;
}
};
......
......@@ -65,9 +65,9 @@ public:
MobilenetDetection() {};
~MobilenetDetection() {};
bool init(const std::string& tensor_path, const int n_classes);
void preprocess(cv::Mat &frame);
void postprocess(const bool mAP=false);
bool init(const std::string& tensor_path, const int n_classes, const int n_batches=1);
void preprocess(cv::Mat &frame, const int bi=0);
void postprocess(const int bi=0,const bool mAP=false);
};
......
......@@ -24,9 +24,9 @@ public:
Yolo3Detection() {};
~Yolo3Detection() {};
bool init(const std::string& tensor_path, const int n_classes=80);
void preprocess(cv::Mat &frame);
void postprocess(const bool mAP=false);
bool init(const std::string& tensor_path, const int n_classes=80, const int n_batches=1);
void preprocess(cv::Mat &frame, const int bi=0);
void postprocess(const int bi=0,const bool mAP=false);
};
......
......@@ -3,10 +3,11 @@
namespace tk { namespace dnn {
bool CenternetDetection::init(const std::string& tensor_path, const int n_classes){
bool CenternetDetection::init(const std::string& tensor_path, const int n_classes, const int n_batches){
std::cout<<(tensor_path).c_str()<<"\n";
netRT = new tk::dnn::NetworkRT(NULL, (tensor_path).c_str() );
classes = n_classes;
nBatches = n_batches;
dim = netRT->input_dim;
......@@ -41,7 +42,7 @@ bool CenternetDetection::init(const std::string& tensor_path, const int n_classe
trans = cv::Mat(cv::Size(3,2), CV_32F);
trans2 = cv::Mat(cv::Size(3,2), CV_32F);
checkCuda(cudaMalloc(&input_d, sizeof(dnnType)*netRT->input_dim.tot()));
checkCuda(cudaMalloc(&input_d, sizeof(dnnType)*netRT->input_dim.tot() * nBatches));
dim_hm = tk::dnn::dataDim_t(1, 80, 128, 128, 1);
dim_wh = tk::dnn::dataDim_t(1, 2, 128, 128, 1);
......@@ -98,7 +99,7 @@ bool CenternetDetection::init(const std::string& tensor_path, const int n_classe
checkCuda(cudaMemcpy(mean_d, mean, 3*sizeof(float), cudaMemcpyHostToDevice));
checkCuda(cudaMemcpy(stddev_d, stddev, 3*sizeof(float), cudaMemcpyHostToDevice));
#else
checkCuda(cudaMallocHost(&input, sizeof(dnnType)*netRT->input_dim.tot()));
checkCuda(cudaMallocHost(&input, sizeof(dnnType)*netRT->input_dim.tot()* nBatches));
mean << 0.408, 0.447, 0.47;
stddev << 0.289, 0.274, 0.278;
#endif
......@@ -120,7 +121,7 @@ bool CenternetDetection::init(const std::string& tensor_path, const int n_classe
}
void CenternetDetection::preprocess(cv::Mat &frame){
void CenternetDetection::preprocess(cv::Mat &frame, const int bi){
// -----------------------------------pre-process ------------------------------------------
// auto start_t = std::chrono::steady_clock::now();
......@@ -212,7 +213,7 @@ void CenternetDetection::preprocess(cv::Mat &frame){
// std::cout << " TIME normalize: " << std::chrono::duration_cast<std::chrono:: microseconds>(end_t - step_t).count() << " us" << std::endl;
// step_t = end_t;
checkCuda(cudaMemcpy(input_d, d_ptrs, dim2.tot()*sizeof(dnnType), cudaMemcpyDeviceToDevice));
checkCuda(cudaMemcpy(input_d+ netRT->input_dim.tot()*bi, d_ptrs, dim2.tot()*sizeof(dnnType), cudaMemcpyDeviceToDevice));
// end_t = std::chrono::steady_clock::now();
// std::cout << " TIME Memcpy to input_d: " << std::chrono::duration_cast<std::chrono:: microseconds>(end_t - step_t).count() << " us" << std::endl;
......@@ -254,18 +255,18 @@ void CenternetDetection::preprocess(cv::Mat &frame){
int idx = i*imageF.rows*imageF.cols;
int ch = dim2.c-3 +i;
// std::cout<<"i: "<<i<<", idx: "<<idx<<", ch: "<<ch<<std::endl;
memcpy((void*)&input[idx], (void*)bgr[ch].data, imageF.rows*imageF.cols*sizeof(dnnType));
memcpy((void*)&input[idx+ netRT->input_dim.tot()*bi], (void*)bgr[ch].data, imageF.rows*imageF.cols*sizeof(dnnType));
}
checkCuda(cudaMemcpyAsync(input_d, input, dim2.tot()*sizeof(dnnType), cudaMemcpyHostToDevice));
checkCuda(cudaMemcpyAsync(input_d+ netRT->input_dim.tot()*bi, input+ netRT->input_dim.tot()*bi, dim2.tot()*sizeof(dnnType), cudaMemcpyHostToDevice));
#endif
}
void CenternetDetection::postprocess(const bool mAP){
void CenternetDetection::postprocess(const int bi, const bool mAP){
dnnType *rt_out[4];
rt_out[0] = (dnnType *)netRT->buffersRT[1];
rt_out[1] = (dnnType *)netRT->buffersRT[2];
rt_out[2] = (dnnType *)netRT->buffersRT[3];
rt_out[3] = (dnnType *)netRT->buffersRT[4];
rt_out[0] = (dnnType *)netRT->buffersRT[1]+ netRT->buffersDIM[0].tot()*bi;
rt_out[1] = (dnnType *)netRT->buffersRT[2]+ netRT->buffersDIM[1].tot()*bi;
rt_out[2] = (dnnType *)netRT->buffersRT[3]+ netRT->buffersDIM[2].tot()*bi;
rt_out[3] = (dnnType *)netRT->buffersRT[4]+ netRT->buffersDIM[3].tot()*bi;
// auto start_t = std::chrono::steady_clock::now();
// auto step_t = std::chrono::steady_clock::now();
......@@ -389,6 +390,7 @@ void CenternetDetection::postprocess(const bool mAP){
}
}
batchDetected.push_back(detected);
// end_t = std::chrono::steady_clock::now();
// std::cout << " TIME detections: " << std::chrono::duration_cast<std::chrono:: microseconds>(end_t - step_t).count() << " us" << std::endl;
// step_t = end_t;
......
......@@ -126,11 +126,12 @@ float MobilenetDetection::iou(const tk::dnn::box &a, const tk::dnn::box &b){
return iou;
}
bool MobilenetDetection::init(const std::string& tensor_path, const int n_classes){
bool MobilenetDetection::init(const std::string& tensor_path, const int n_classes, const int n_batches){
std::cout<<(tensor_path).c_str()<<"\n";
netRT = new tk::dnn::NetworkRT(NULL, (tensor_path).c_str());
imageSize = netRT->input_dim.h;
classes = n_classes;
nBatches = n_batches;
SSDSpec specs[N_SSDSPEC];
......@@ -157,9 +158,9 @@ bool MobilenetDetection::init(const std::string& tensor_path, const int n_classe
generate_ssd_priors(specs, N_SSDSPEC);
#ifndef OPENCV_CUDACONTRIB
checkCuda(cudaMallocHost(&input, sizeof(dnnType) * netRT->input_dim.tot()));
checkCuda(cudaMallocHost(&input, sizeof(dnnType) * netRT->input_dim.tot() * nBatches));
#endif
checkCuda(cudaMalloc(&input_d, sizeof(dnnType) * netRT->input_dim.tot()));
checkCuda(cudaMalloc(&input_d, sizeof(dnnType) * netRT->input_dim.tot() * nBatches));
locations_h = (float *)malloc(N_COORDS * nPriors * sizeof(float));
confidences_h = (float *)malloc(nPriors * classes * sizeof(float));
......@@ -208,7 +209,7 @@ bool MobilenetDetection::init(const std::string& tensor_path, const int n_classe
return 1;
}
void MobilenetDetection::preprocess(cv::Mat &frame){
void MobilenetDetection::preprocess(cv::Mat &frame, const int bi){
#ifdef OPENCV_CUDACONTRIB
//move original image on GPU
cv::cuda::GpuMat orig_img, frame_nomean;
......@@ -224,7 +225,7 @@ void MobilenetDetection::preprocess(cv::Mat &frame){
for(int i=0; i < netRT->input_dim.c; i++){
int idx = i * imagePreproc.rows * imagePreproc.cols;
checkCuda( cudaMemcpy((void *)&input_d[idx], (void *)bgr[i].data, imagePreproc.rows * imagePreproc.cols* sizeof(float), cudaMemcpyDeviceToDevice) );
checkCuda( cudaMemcpy((void *)&input_d[idx + netRT->input_dim.tot()*bi], (void *)bgr[i].data, imagePreproc.rows * imagePreproc.cols* sizeof(float), cudaMemcpyDeviceToDevice) );
}
#else
//resize image, remove mean, divide by std
......@@ -237,17 +238,17 @@ void MobilenetDetection::preprocess(cv::Mat &frame){
cv::split(imagePreproc, bgr);
for (int i = 0; i < netRT->input_dim.c; i++){
int idx = i * imagePreproc.rows * imagePreproc.cols;
memcpy((void *)&input[idx], (void *)bgr[i].data, imagePreproc.rows * imagePreproc.cols * sizeof(dnnType));
memcpy((void *)&input[idx + netRT->input_dim.tot()*bi], (void *)bgr[i].data, imagePreproc.rows * imagePreproc.cols * sizeof(dnnType));
}
checkCuda(cudaMemcpyAsync(input_d, input, netRT->input_dim.tot() * sizeof(dnnType), cudaMemcpyHostToDevice, netRT->stream));
checkCuda(cudaMemcpyAsync(input_d+ netRT->input_dim.tot()*bi, input + netRT->input_dim.tot()*bi, netRT->input_dim.tot() * sizeof(dnnType), cudaMemcpyHostToDevice, netRT->stream));
#endif
}
void MobilenetDetection::postprocess(const bool mAP){
void MobilenetDetection::postprocess(const int bi, const bool mAP){
//get confidences and locations_h
dnnType *rt_out[2];
rt_out[0] = (dnnType *)netRT->buffersRT[3];
rt_out[1] = (dnnType *)netRT->buffersRT[4];
rt_out[0] = (dnnType *)netRT->buffersRT[3]+ netRT->buffersDIM[3].tot()*bi;
rt_out[1] = (dnnType *)netRT->buffersRT[4]+ netRT->buffersDIM[4].tot()*bi;
detected.clear();
......@@ -302,6 +303,7 @@ void MobilenetDetection::postprocess(const bool mAP){
boxes = remaining;
}
}
batchDetected.push_back(detected);
}
......
......@@ -3,12 +3,16 @@
namespace tk { namespace dnn {
bool Yolo3Detection::init(const std::string& tensor_path, const int n_classes) {
bool Yolo3Detection::init(const std::string& tensor_path, const int n_classes, const int n_batches) {
//convert network to tensorRT
std::cout<<(tensor_path).c_str()<<"\n";
netRT = new tk::dnn::NetworkRT(NULL, (tensor_path).c_str() );
nBatches = n_batches;
tk::dnn::dataDim_t idim = netRT->input_dim;
idim.n = nBatches;
if(netRT->pluginFactory->n_yolos < 2 ) {
FatalError("this is not yolo3");
}
......@@ -19,7 +23,7 @@ bool Yolo3Detection::init(const std::string& tensor_path, const int n_classes) {
num = yRT->num;
nMasks = yRT->n_masks;
// make a yolo layer for interpret predictions
// make a yolo layer to interpret predictions
yolo[i] = new tk::dnn::Yolo(nullptr, classes, nMasks, ""); // yolo without input and bias
yolo[i]->mask_h = new dnnType[nMasks];
yolo[i]->bias_h = new dnnType[num*nMasks*2];
......@@ -31,9 +35,9 @@ bool Yolo3Detection::init(const std::string& tensor_path, const int n_classes) {
dets = tk::dnn::Yolo::allocateDetections(tk::dnn::Yolo::MAX_DETECTIONS, classes);
#ifndef OPENCV_CUDACONTRIB
checkCuda(cudaMallocHost(&input, sizeof(dnnType)*netRT->input_dim.tot()));
checkCuda(cudaMallocHost(&input, sizeof(dnnType)*idim.tot()));
#endif
checkCuda(cudaMalloc(&input_d, sizeof(dnnType)*netRT->input_dim.tot()));
checkCuda(cudaMalloc(&input_d, sizeof(dnnType)*idim.tot()));
// class colors precompute
for(int c=0; c<classes; c++) {
......@@ -48,7 +52,7 @@ bool Yolo3Detection::init(const std::string& tensor_path, const int n_classes) {
return true;
}
void Yolo3Detection::preprocess(cv::Mat &frame){
void Yolo3Detection::preprocess(cv::Mat &frame, const int bi){
#ifdef OPENCV_CUDACONTRIB
cv::cuda::GpuMat orig_img, img_resized;
orig_img = cv::cuda::GpuMat(frame);
......@@ -64,7 +68,7 @@ void Yolo3Detection::preprocess(cv::Mat &frame){
int size = imagePreproc.rows * imagePreproc.cols;
int ch = netRT->input_dim.c-1 -i;
bgr[ch].download(bgr_h); //TODO: don't copy back on CPU
checkCuda( cudaMemcpy(input_d + i*size, (float*)bgr_h.data, size*sizeof(dnnType), cudaMemcpyHostToDevice));
checkCuda( cudaMemcpy(input_d + i*size + netRT->input_dim.tot()*bi, (float*)bgr_h.data, size*sizeof(dnnType), cudaMemcpyHostToDevice));