From 5cb97ff0174cc1c55370db8fbe1099ec8b027fdb Mon Sep 17 00:00:00 2001 From: lindsayshuo <932695342@qq.com> Date: Tue, 14 May 2024 13:48:38 +0800 Subject: [PATCH] Change grids in forwardGpu to one-dimensional arrays and fix p2 model download link (#1524) * Add the generation of multi-class pose engines * Change grids in forwardGpu to one-dimensional arrays * Update README.md --------- Co-authored-by: lindsayshuo --- yolov8/README.md | 2 +- yolov8/plugin/yololayer.cu | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) mode change 100755 => 100644 yolov8/plugin/yololayer.cu diff --git a/yolov8/README.md b/yolov8/README.md index 4d4f683b..84868bc5 100644 --- a/yolov8/README.md +++ b/yolov8/README.md @@ -42,7 +42,7 @@ python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect // For p2 model -// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model) +// download https://github.com/lindsayshuo/yolov8_p2_tensorrtx/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model) python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for 10 cls p2 model) // a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated. ``` diff --git a/yolov8/plugin/yololayer.cu b/yolov8/plugin/yololayer.cu old mode 100755 new mode 100644 index 592914fa..c42b841c --- a/yolov8/plugin/yololayer.cu +++ b/yolov8/plugin/yololayer.cu @@ -258,25 +258,38 @@ void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cuda } int numElem = 0; - const int maxGrids = mStridesLength; - int grids[maxGrids][2]; + // const int maxGrids = mStridesLength; + // int grids[maxGrids][2]; + // for (int i = 0; i < maxGrids; ++i) { + // grids[i][0] = mYoloV8netHeight / mStrides[i]; + // grids[i][1] = mYoloV8NetWidth / mStrides[i]; + // } + + int maxGrids = mStridesLength; + int flatGridsLen = 2 * maxGrids; + int* flatGrids = new int[flatGridsLen]; + for (int i = 0; i < maxGrids; ++i) { - grids[i][0] = mYoloV8netHeight / mStrides[i]; - grids[i][1] = mYoloV8NetWidth / mStrides[i]; + flatGrids[2 * i] = mYoloV8netHeight / mStrides[i]; + flatGrids[2 * i + 1] = mYoloV8NetWidth / mStrides[i]; } for (unsigned int i = 0; i < maxGrids; i++) { - int grid_h = grids[i][0]; - int grid_w = grids[i][1]; + // Access the elements of the original 2D array from the flattened 1D array + int grid_h = flatGrids[2 * i]; // Corresponds to the access of grids[i][0] + int grid_w = flatGrids[2 * i + 1]; // Corresponds to the access of grids[i][1] int stride = mStrides[i]; - numElem = grid_h * grid_w * batchSize; - if (numElem < mThreadCount) + numElem = grid_h * grid_w * batchSize; // Calculate the total number of elements + if (numElem < mThreadCount) // Adjust the thread count if needed mThreadCount = numElem; + // The CUDA kernel call remains unchanged CalDetection<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>( inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, mNumberofpoints, mConfthreshkeypoints, outputElem, is_segmentation_, is_pose_); } + + delete[] flatGrids; } PluginFieldCollection YoloPluginCreator::mFC{};