Skip to content

Commit

Permalink
Change grids in forwardGpu to one-dimensional arrays and fix p2 model…
Browse files Browse the repository at this point in the history
… download link (#1524)

* Add the generation of multi-class pose engines

* Change grids in forwardGpu to one-dimensional arrays

* Update README.md

---------

Co-authored-by: lindsayshuo <[email protected]>
  • Loading branch information
lindsayshuo and lindsayshuo authored May 14, 2024
1 parent e555c9f commit 5cb97ff
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
2 changes: 1 addition & 1 deletion yolov8/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ python gen_wts.py -w yolov8n.pt -o yolov8n.wts -t detect
// For p2 model
// download https://github.com/lindsayshuo/yolov8-p2/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
// download https://github.com/lindsayshuo/yolov8_p2_tensorrtx/releases/download/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last/VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt (only for 10 cls p2 model)
python gen_wts.py -w VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.pt -o VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts -t detect (only for 10 cls p2 model)
// a file 'VisDrone_train_yolov8x_p2_bs1_epochs_100_imgsz_1280_last.wts' will be generated.
```
Expand Down
29 changes: 21 additions & 8 deletions yolov8/plugin/yololayer.cu
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -258,25 +258,38 @@ void YoloLayerPlugin::forwardGpu(const float* const* inputs, float* output, cuda
}
int numElem = 0;

const int maxGrids = mStridesLength;
int grids[maxGrids][2];
// const int maxGrids = mStridesLength;
// int grids[maxGrids][2];
// for (int i = 0; i < maxGrids; ++i) {
// grids[i][0] = mYoloV8netHeight / mStrides[i];
// grids[i][1] = mYoloV8NetWidth / mStrides[i];
// }

int maxGrids = mStridesLength;
int flatGridsLen = 2 * maxGrids;
int* flatGrids = new int[flatGridsLen];

for (int i = 0; i < maxGrids; ++i) {
grids[i][0] = mYoloV8netHeight / mStrides[i];
grids[i][1] = mYoloV8NetWidth / mStrides[i];
flatGrids[2 * i] = mYoloV8netHeight / mStrides[i];
flatGrids[2 * i + 1] = mYoloV8NetWidth / mStrides[i];
}

for (unsigned int i = 0; i < maxGrids; i++) {
int grid_h = grids[i][0];
int grid_w = grids[i][1];
// Access the elements of the original 2D array from the flattened 1D array
int grid_h = flatGrids[2 * i]; // Corresponds to the access of grids[i][0]
int grid_w = flatGrids[2 * i + 1]; // Corresponds to the access of grids[i][1]
int stride = mStrides[i];
numElem = grid_h * grid_w * batchSize;
if (numElem < mThreadCount)
numElem = grid_h * grid_w * batchSize; // Calculate the total number of elements
if (numElem < mThreadCount) // Adjust the thread count if needed
mThreadCount = numElem;

// The CUDA kernel call remains unchanged
CalDetection<<<(numElem + mThreadCount - 1) / mThreadCount, mThreadCount, 0, stream>>>(
inputs[i], output, numElem, mMaxOutObject, grid_h, grid_w, stride, mClassCount, mNumberofpoints,
mConfthreshkeypoints, outputElem, is_segmentation_, is_pose_);
}

delete[] flatGrids;
}

PluginFieldCollection YoloPluginCreator::mFC{};
Expand Down

0 comments on commit 5cb97ff

Please sign in to comment.