Commit 69256992 authored by Micaela Verucchi's avatar Micaela Verucchi
Browse files

Add colormap for each segmentation dataset



Signed-off-by: default avatarMicaela Verucchi <micaelaverucchi@gmail.com>
parent 84ff978c
......@@ -18,8 +18,8 @@ If you use tkDNN in your research, please cite the [following paper](https://iee
```
### What's new (20 July 2021)
- [x] Support to sematic segmentation [REAME](readme/README_seg.md)
- [] Support to TensorRT8 (WIP)
- [x] Support to sematic segmentation [REAME](docs/README_seg.md)
- [ ] Support to TensorRT8 (WIP)
## FPS Results
Inference FPS of yolov4 with tkDNN, average of 1200 images with the same dimension as the input size, on
......
......@@ -66,15 +66,6 @@ int main(int argc, char *argv[]) {
if(n_batch < 1 || n_batch > 64)
FatalError("Batch dim not supported");
std::string net_name;
removePathAndExtension(net, net_name);
bool mapillary_15_colormap = false;
if( n_classes == 15 &&
( net_name == "shelfnet_mapillary_fp32" ||
net_name == "shelfnet_mapillary_fp16" ||
net_name == "shelfnet_mapillary_int8" ) )
mapillary_15_colormap = true;
//net initialization
tk::dnn::SegmentationNN segNN;
segNN.init(net, n_classes, n_batch);
......@@ -130,7 +121,7 @@ int main(int argc, char *argv[]) {
width = frame.cols;
//inference
segNN.updateOriginal(frame, true, mapillary_15_colormap);
segNN.updateOriginal(frame, true);
if(show)
segNN.draw();
......
......@@ -39,14 +39,11 @@ where
* ```<show-flag>``` if set to 0 the demo will not show the visualization but save the video into result.mp4 (if n-batches ==1)
* ```<write-pred>``` if set to 0 (default) the demo will run, otherwise the evaluation of a dataset will run and the output of the segmentation will be saved. Attention: this is under development and paths are embedded, so change them in the code in advance.
N.b. By default it is used FP32 inference
<center>
NB) By default it is used FP32 inference
NB) The batching is not used to work on more streams, rather to work on more tiles of the same image. Shelfnet never resized the input image, therefore for images greater than 1024x1024 tiles of 1024x1024 are given in input to the network in batch.
![gif](output.gif "Results on yolo_test.mp4")
</center>
For other demo videos refer to [this playlist](https://www.youtube.com/playlist?list=PLv0nEQYDD45y5EdSiywwCGPBmJVUzIWwe).
......@@ -60,6 +57,22 @@ For other demo videos refer to [this playlist](https://www.youtube.com/playlist?
1. Zhuang, Juntang, et al. "ShelfNet for fast semantic segmentation." Proceedings of the IEEE International Conference on Computer Vision Workshops. 2019.
## FPS Results
Inference FPS of shelfnet with tkDNN, average of 1200 images on
* RTX 2080Ti (CUDA 10.2, TensorRT 7.0.0, Cudnn 7.6.5);
| Platform | Test | Phase | FP32, ms | FP32, FPS | FP16, ms | FP16, FPS | INT8, ms | INT8, FPS |
| :------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
| RTX 2080Ti | shelfnet 1024x1024 (B=1) | pre | 6.11863 | 163.435 | 5.81465 | 171.979 | 5.88699 | 169.866 |
| RTX 2080Ti | shelfnet 1024x1024 (B=1) | inf | 11.5464 | 86.6074 | 7.35396 | 135.981 | 6.37623 | 156.832 |
| RTX 2080Ti | shelfnet 1024x1024 (B=1) | post | 4.09058 | 244.464 | 3.91961 | 255.128 | 4.07343 | 245.493 |
| RTX 2080Ti | shelfnet 1024x1024 (B=1) | tot | 21.7556 | 45.9652 | 17.0882 | 58.5199 | 16.3366 | 61.2121 |
| RTX 2080Ti | shelfnet 2048x2048 (B=4) | pre | 25.435 | 39.3158 | 25.2953 | 39.5331 | 25.9303 | 38.565 |
| RTX 2080Ti | shelfnet 2048x2048 (B=4) | inf | 36.5015 | 27.3961 | 17.0534 | 58.6395 | 15.6061 | 64.0773 |
| RTX 2080Ti | shelfnet 2048x2048 (B=4) | post | 17.3917 | 57.4985 | 17.1649 | 58.2583 | 17.5539 | 56.9675 |
| RTX 2080Ti | shelfnet 2048x2048 (B=4) | tot | 79.3283 | 12.6058 | 59.5136 | 16.8029 | 59.0903 | 16.9233 |
## Known issues
When creating the rt file all the checks returns errors. It is due to a different resize function and handling of the original ShelfNet outputs.
......
......@@ -5,8 +5,8 @@
namespace tk { namespace dnn {
cv::Mat vizFloat2colorMap(cv::Mat map, double min=0, double max=0, bool mapillary_15=false);
cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min=0, double max=0, bool mapillary_15=false);
cv::Mat vizFloat2colorMap(cv::Mat map, double min=0, double max=0, int classes=19);
cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min=0, double max=0, int classes=19);
cv::Mat vizLayer2Mat(tk::dnn::Network *net, int layer, int imgdim = 1000);
}}
......@@ -97,7 +97,7 @@ class SegmentationNN {
*
* @param bi batch index
*/
void postprocess(const int bi=0, bool appy_colormap = true, bool mapillary_15=false) {
void postprocess(const int bi=0, bool appy_colormap = true) {
dnnType *rt_out = (dnnType *)netRT->buffersRT[1]+ netRT->buffersDIM[1].tot()*bi;
dataDim_t odim = netRT->output_dim;
......@@ -112,7 +112,7 @@ class SegmentationNN {
cv::Mat colored;
if(appy_colormap)
colored = vizData2Mat(tmpOutData_h, vdim, netRT->input_dim.h, netRT->input_dim.w, 0, classes, mapillary_15);
colored = vizData2Mat(tmpOutData_h, vdim, netRT->input_dim.h, netRT->input_dim.w, 0, classes, classes);
else{
cv::Mat colored_fp32 (cv::Size(odim.w, odim.h),CV_32FC1, tmpOutData_h);
colored_fp32.convertTo(colored, CV_8UC1);
......@@ -234,7 +234,7 @@ class SegmentationNN {
}
}
void updateOriginal(cv::Mat frame, bool apply_colormap=true, bool mapillary_15=false){
void updateOriginal(cv::Mat frame, bool apply_colormap=true){
std::vector<cv::Mat> splitted_frames;
int H, W, net_H, net_W;
......@@ -347,7 +347,7 @@ class SegmentationNN {
cv::Mat colored;
if(apply_colormap)
colored = vizData2Mat(tmpOutData_h, vdim, netRT->input_dim.h, netRT->input_dim.w, 0, classes, mapillary_15);
colored = vizData2Mat(tmpOutData_h, vdim, netRT->input_dim.h, netRT->input_dim.w, 0, classes, classes);
else{
cv::Mat colored_fp32 (cv::Size(odim.w, odim.h),CV_32FC1, tmpOutData_h);
colored_fp32.convertTo(colored, CV_8UC1);
......
......@@ -12,62 +12,77 @@ cv::Mat mapillary_15_map(cv::Mat adjMap){
// cv::waitKey(0);
cv::Mat M1(1, 256, CV_8UC1), M2(1, 256, CV_8UC1), M3(1, 256, CV_8UC1);
//animal
M3.at<uchar>(0)=165;
M2.at<uchar>(0)=42;
M1.at<uchar>(0)=45;
//curb
M3.at<uchar>(1)=196;
M2.at<uchar>(1)=196;
M1.at<uchar>(1)=196;
//barrier
M3.at<uchar>(2)=90;
M2.at<uchar>(2)=120;
M1.at<uchar>(2)=150;
//road
M3.at<uchar>(3)=128;
M2.at<uchar>(3)=64;
M1.at<uchar>(3)=128;
//building
M3.at<uchar>(4)=70;
M2.at<uchar>(4)=70;
M1.at<uchar>(4)=70;
//person
M3.at<uchar>(5)=220;
M2.at<uchar>(5)=20;
M1.at<uchar>(5)=60;
//roadmark
M3.at<uchar>(6)=255;
M2.at<uchar>(6)=255;
M1.at<uchar>(6)=255;
//nature
M3.at<uchar>(7)=107;
M2.at<uchar>(7)=142;
M1.at<uchar>(7)=35;
//sky
M3.at<uchar>(8)=70;
M2.at<uchar>(8)=130;
M1.at<uchar>(8)=180;
//billboard
M3.at<uchar>(9)=220;
M2.at<uchar>(9)=220;
M1.at<uchar>(9)=220;
//pole
M3.at<uchar>(10)=153;
M2.at<uchar>(10)=153;
M1.at<uchar>(10)=153;
//traffic sign
M3.at<uchar>(11)=128;
M2.at<uchar>(11)=128;
M1.at<uchar>(11)=128;
//bike
M3.at<uchar>(12)=119;
M2.at<uchar>(12)=11;
M1.at<uchar>(12)=32;
//vehicle
M3.at<uchar>(13)=0;
M2.at<uchar>(13)=0;
M1.at<uchar>(13)=142;
//void
for(int i=14;i<256;i++)
{
M1.at<uchar>(i)=0;
......@@ -93,7 +108,256 @@ cv::Mat mapillary_15_map(cv::Mat adjMap){
}
cv::Mat vizFloat2colorMap(cv::Mat map,double min, double max, bool mapillary_15) {
cv::Mat berkeley_20_map(cv::Mat adjMap){
cv::Mat M1(1, 256, CV_8UC1), M2(1, 256, CV_8UC1), M3(1, 256, CV_8UC1);
//road
M3.at<uchar>(0)=128;
M2.at<uchar>(0)=64;
M1.at<uchar>(0)=128;
//sidewalk
M3.at<uchar>(1)=244;
M2.at<uchar>(1)=35;
M1.at<uchar>(1)=232;
//building
M3.at<uchar>(2)=70;
M2.at<uchar>(2)=70;
M1.at<uchar>(2)=70;
//wall
M3.at<uchar>(3)=102;
M2.at<uchar>(3)=102;
M1.at<uchar>(3)=156;
//fence
M3.at<uchar>(4)=90;
M2.at<uchar>(4)=120;
M1.at<uchar>(4)=150;
//pole
M3.at<uchar>(5)=153;
M2.at<uchar>(5)=153;
M1.at<uchar>(5)=153;
//traffic light
M3.at<uchar>(6)=250;
M2.at<uchar>(6)=170;
M1.at<uchar>(6)=30;
//traffic sign
M3.at<uchar>(7)=128;
M2.at<uchar>(7)=128;
M1.at<uchar>(7)=128;
//nature
M3.at<uchar>(8)=107;
M2.at<uchar>(8)=142;
M1.at<uchar>(8)=35;
//ground
M3.at<uchar>(9)=0;
M2.at<uchar>(9)=192;
M1.at<uchar>(9)=0;
//sky
M3.at<uchar>(10)=70;
M2.at<uchar>(10)=130;
M1.at<uchar>(10)=180;
//person
M3.at<uchar>(11)=220;
M2.at<uchar>(11)=20;
M1.at<uchar>(11)=60;
//rider
M3.at<uchar>(12)=255;
M2.at<uchar>(12)=0;
M1.at<uchar>(12)=100;
//car
M3.at<uchar>(13)=0;
M2.at<uchar>(13)=0;
M1.at<uchar>(13)=142;
//truck
M3.at<uchar>(14)=0;
M2.at<uchar>(14)=0;
M1.at<uchar>(14)=70;
//bus
M3.at<uchar>(15)=0;
M2.at<uchar>(15)=60;
M1.at<uchar>(15)=100;
//train
M3.at<uchar>(16)=0;
M2.at<uchar>(16)=0;
M1.at<uchar>(16)=192;
//motorbike
M3.at<uchar>(17)=0;
M2.at<uchar>(17)=0;
M1.at<uchar>(17)=230;
//bike
M3.at<uchar>(18)=119;
M2.at<uchar>(18)=11;
M1.at<uchar>(18)=32;
//void
for(int i=19;i<256;i++)
{
M1.at<uchar>(i)=0;
M2.at<uchar>(i)=0;
M3.at<uchar>(i)=0;
}
cv::Mat r1,r2,r3;
cv::LUT(adjMap,M1,r1);
cv::LUT(adjMap,M2,r2);
cv::LUT(adjMap,M3,r3);
std::vector<cv::Mat> planes;
planes.push_back(r1);
planes.push_back(r2);
planes.push_back(r3);
cv::Mat dst;
cv::merge(planes,dst);
return dst;
}
cv::Mat cityscapes_19_map(cv::Mat adjMap){
cv::Mat M1(1, 256, CV_8UC1), M2(1, 256, CV_8UC1), M3(1, 256, CV_8UC1);
//road
M3.at<uchar>(0)=128;
M2.at<uchar>(0)=64;
M1.at<uchar>(0)=128;
//sidewalk
M3.at<uchar>(1)=244;
M2.at<uchar>(1)=35;
M1.at<uchar>(1)=232;
//building
M3.at<uchar>(2)=70;
M2.at<uchar>(2)=70;
M1.at<uchar>(2)=70;
//wall
M3.at<uchar>(3)=102;
M2.at<uchar>(3)=102;
M1.at<uchar>(3)=156;
//fence
M3.at<uchar>(4)=190;
M2.at<uchar>(4)=153;
M1.at<uchar>(4)=153;
//pole
M3.at<uchar>(5)=153;
M2.at<uchar>(5)=153;
M1.at<uchar>(5)=153;
//traffic light
M3.at<uchar>(6)=250;
M2.at<uchar>(6)=170;
M1.at<uchar>(6)=30;
//traffic sign
M3.at<uchar>(7)=220;
M2.at<uchar>(7)=220;
M1.at<uchar>(7)=0;
//vegetation
M3.at<uchar>(8)=107;
M2.at<uchar>(8)=142;
M1.at<uchar>(8)=35;
//terrain
M3.at<uchar>(9)=152;
M2.at<uchar>(9)=251;
M1.at<uchar>(9)=152;
//sky
M3.at<uchar>(10)=70;
M2.at<uchar>(10)=130;
M1.at<uchar>(10)=180;
//person
M3.at<uchar>(11)=220;
M2.at<uchar>(11)=20;
M1.at<uchar>(11)=60;
//rider
M3.at<uchar>(12)=255;
M2.at<uchar>(12)=0;
M1.at<uchar>(12)=0;
//car
M3.at<uchar>(13)=0;
M2.at<uchar>(13)=0;
M1.at<uchar>(13)=142;
//truck
M3.at<uchar>(14)=0;
M2.at<uchar>(14)=0;
M1.at<uchar>(14)=70;
//bus
M3.at<uchar>(15)=0;
M2.at<uchar>(15)=60;
M1.at<uchar>(15)=100;
//train
M3.at<uchar>(16)=0;
M2.at<uchar>(16)=80;
M1.at<uchar>(16)=100;
//motorcycle
M3.at<uchar>(17)=0;
M2.at<uchar>(17)=0;
M1.at<uchar>(17)=230;
//bicycle
M3.at<uchar>(18)=119;
M2.at<uchar>(18)=11;
M1.at<uchar>(18)=32;
//void
for(int i=19;i<256;i++)
{
M1.at<uchar>(i)=0;
M2.at<uchar>(i)=0;
M3.at<uchar>(i)=0;
}
cv::Mat r1,r2,r3;
cv::LUT(adjMap,M1,r1);
cv::LUT(adjMap,M2,r2);
cv::LUT(adjMap,M3,r3);
std::vector<cv::Mat> planes;
planes.push_back(r1);
planes.push_back(r2);
planes.push_back(r3);
cv::Mat dst;
cv::merge(planes,dst);
return dst;
}
cv::Mat vizFloat2colorMap(cv::Mat map,double min, double max, int classes) {
if(min == 0 && max == 0)
cv::minMaxIdx(map, &min, &max);
......@@ -101,11 +365,22 @@ cv::Mat vizFloat2colorMap(cv::Mat map,double min, double max, bool mapillary_15)
cv::Mat adjMap;
cv::Mat falseColorsMap;
if(mapillary_15){
switch (classes)
{
case 15:
map.convertTo(adjMap,CV_8UC1);
falseColorsMap = mapillary_15_map(adjMap);
}
else{
break;
case 20:
map.convertTo(adjMap,CV_8UC1);
falseColorsMap = berkeley_20_map(adjMap);
break;
case 19:
map.convertTo(adjMap,CV_8UC1);
falseColorsMap = cityscapes_19_map(adjMap);
break;
default:
// expand your range to 0..255. Similar to histEq();
map.convertTo(adjMap,CV_8UC1, 255 / (max-min), -min);
applyColorMap(adjMap, falseColorsMap, cv::COLORMAP_JET);
......@@ -113,7 +388,7 @@ cv::Mat vizFloat2colorMap(cv::Mat map,double min, double max, bool mapillary_15)
return falseColorsMap;
}
cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min, double max, bool mapillary_15) {
cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int img_w, double min, double max, int classes) {
dnnType *data = nullptr;
// copy to CPU
......@@ -129,7 +404,7 @@ cv::Mat vizData2Mat(dnnType *dataInput, tk::dnn::dataDim_t dim, int img_h, int i
cv::Mat grid = cv::Mat(gridSize, CV_8UC3, cv::Scalar(0));
for(int i=0; i<dim.c;i++) {
cv::Mat raw = vizFloat2colorMap(cv::Mat(cv::Size(dim.w, dim.h),CV_32FC1, data + dim.w*dim.h*i), min, max, mapillary_15);
cv::Mat raw = vizFloat2colorMap(cv::Mat(cv::Size(dim.w, dim.h),CV_32FC1, data + dim.w*dim.h*i), min, max, classes);
int r = i / gridDim;
int c = i - r * gridDim;
raw.copyTo(grid.rowRange(r*dim.h, r*dim.h + dim.h).colRange(c*dim.w, c*dim.w + dim.w));
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment