Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds enable_frame_num to the experimental video reader #5628

Merged
merged 5 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ template <typename Backend>
class VideoSample {
public:
Tensor<Backend> data_;
int label_;
int label_ = -1;
int first_frame_ = -1;
};

class VideoLoaderDecoderBase {
Expand All @@ -46,6 +47,7 @@ class VideoLoaderDecoderBase {
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
has_frame_idx_ = spec.GetArgument<bool>("enable_frame_num");
DALI_ENFORCE(
!has_labels_ || labels_.size() == filenames_.size(),
make_string(
Expand All @@ -61,6 +63,7 @@ class VideoLoaderDecoderBase {
std::vector<std::string> filenames_;
std::vector<int> labels_;
bool has_labels_ = false;
bool has_frame_idx_ = false;

Index current_index_ = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ void VideoLoaderDecoderCpu::ReadSample(VideoSample<CPUBackend> &sample) {
if (has_labels_) {
sample.label_ = labels_[sample_span.video_idx_];
}
if (has_frame_idx_) {
sample.first_frame_ = sample_span.start_;
}
}

Index VideoLoaderDecoderCpu::SizeImpl() {
Expand Down
21 changes: 18 additions & 3 deletions dali/operators/reader/video_reader_decoder_cpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderCpu::VideoReaderDecoderCpu(const OpSpec &spec)
: DataReader<CPUBackend, VideoSampleCpu, VideoSampleCpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_idx_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderCpu>(spec);
this->SetInitialSnapshot();
}
Expand All @@ -32,16 +33,26 @@ void VideoReaderDecoderCpu::RunImpl(SampleWorkspace &ws) {
video_output.Copy(sample.data_);
video_output.SetSourceInfo(sample.data_.GetSourceInfo());

int out_index = 1;
if (has_labels_) {
auto &label_output = ws.Output<CPUBackend>(1);
auto &label_output = ws.Output<CPUBackend>(out_index);
label_output.Resize({}, DALIDataType::DALI_INT32);
label_output.mutable_data<int>()[0] = sample.label_;
out_index++;
}
if (has_frame_idx_) {
auto &frame_idx_output = ws.Output<CPUBackend>(out_index);
frame_idx_output.Resize({}, DALIDataType::DALI_INT32);
frame_idx_output.mutable_data<int>()[0] = sample.first_frame_;
out_index++;
}
}

namespace detail {
inline int VideoReaderDecoderOutputFn(const OpSpec &spec) {
return spec.HasArgument("labels") ? 2 : 1;
bool has_labels = spec.HasArgument("labels")
bool has_frame_num_output = spec.GetArgument<bool>("enable_frame_num");
return 1 + has_labels + has_frame_num_output;
}
} // namespace detail

Expand All @@ -68,6 +79,10 @@ even in the variable frame rate scenario.)code")
.AddArg("sequence_length",
R"code(Frames to load per sequence.)code",
DALI_INT32)
.AddOptionalArg("enable_frame_num",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.AddOptionalArg("enable_frame_num",
.AddOptionalArg("enable_frame_idx",

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to keep it consistent with the video reader .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand and agree - nonetheless, it's unfortunate.

R"code(If set, returns the index of the first frame in the decoded sequence
as an additional output.)code",
false)
.AddOptionalArg("step",
R"code(Frame interval between each sequence.

Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_cpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class VideoReaderDecoderCpu

private:
bool has_labels_ = false;
bool has_frame_idx_ = false;
};

} // namespace dali
Expand Down
66 changes: 45 additions & 21 deletions dali/operators/reader/video_reader_decoder_gpu_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ namespace dali {

VideoReaderDecoderGpu::VideoReaderDecoderGpu(const OpSpec &spec)
: DataReader<GPUBackend, VideoSampleGpu, VideoSampleGpu, true>(spec),
has_labels_(spec.HasArgument("labels")) {
has_labels_(spec.HasArgument("labels")),
has_frame_idx_(spec.GetArgument<bool>("enable_frame_num")) {
loader_ = InitLoader<VideoLoaderDecoderGpu>(spec);
this->SetInitialSnapshot();
}
Expand Down Expand Up @@ -50,14 +51,21 @@ bool VideoReaderDecoderGpu::SetupImpl(

output_desc[0] = { video_shape, DALI_UINT8 };

if (!has_labels_) {
return true;
int out_index = 1;
if (has_labels_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}
if (has_frame_idx_) {
output_desc[out_index] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};
out_index++;
}

output_desc[1] = {
uniform_list_shape<1>(batch_size, {1}),
DALI_INT32
};

return true;
}
Expand All @@ -80,23 +88,39 @@ void VideoReaderDecoderGpu::RunImpl(Workspace &ws) {
video_output.SetSourceInfo(sample_id, sample.data_.GetSourceInfo());
}

if (!has_labels_) {
return;
}
int out_index = 1;
if (has_labels_) {
auto &labels_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> labels_cpu;

auto &labels_output = ws.Output<GPUBackend>(1);
SmallVector<int, 32> labels_cpu;
for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
}

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
labels_cpu[sample_id] = sample.label_;
MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
if (has_frame_idx_) {
auto &frame_idx_output = ws.Output<GPUBackend>(out_index);
SmallVector<int, 32> frame_idx_output_cpu;

for (int sample_id = 0; sample_id < batch_size; ++sample_id) {
auto &sample = GetSample(sample_id);
frame_idx_output_cpu[sample_id] = sample.span_ ? sample.span_->start_ : -1;
}

MemCopy(
labels_output.AsTensor().raw_mutable_data(),
labels_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
MemCopy(
frame_idx_output.AsTensor().raw_mutable_data(),
frame_idx_output_cpu.data(),
batch_size * sizeof(DALI_INT32),
ws.stream());
out_index++;
}
}

DALI_REGISTER_OPERATOR(experimental__readers__Video, VideoReaderDecoderGpu, GPU);
Expand Down
1 change: 1 addition & 0 deletions dali/operators/reader/video_reader_decoder_gpu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class VideoReaderDecoderGpu : public DataReader<GPUBackend, VideoSampleGpu, Vide

private:
bool has_labels_ = false;
bool has_frame_idx_ = false;
};

} // namespace dali
Expand Down
34 changes: 28 additions & 6 deletions dali/operators/reader/video_reader_decoder_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
virtual void AssertFrame(
int frame_id, const uint8_t *frame, TestVideo &ground_truth) = 0;

template<typename Backend>
int GetFrameIdx(dali::TensorList<Backend> &device_frame_idx);

private:
template<typename Backend>
void RunTestImpl(
Expand Down Expand Up @@ -129,15 +132,15 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {
.AddArg("device", backend)
.AddArg("sequence_length", sequence_length)
.AddArg("random_shuffle", true)
.AddArg("enable_frame_num", true)
.AddArg("initial_fill", cfr_videos_[0].NumFrames())
.AddArg(
"filenames",
std::vector<std::string>{cfr_videos_paths_[0]})
.AddOutput("frames", backend));

pipe.Build({{"frames", backend}});
.AddOutput("frames", backend)
.AddOutput("frame_idx", backend));

std::vector<int> expected_order = {29, 46, 33, 6, 37};
pipe.Build({{"frames", backend}, {"frame_idx", backend}});

int num_sequences = 5;

Expand All @@ -148,9 +151,10 @@ class VideoReaderDecoderBaseTest : public VideoTestBase {

auto &frame_video_output = ws.Output<Backend>(0);
const auto sample = frame_video_output.template tensor<uint8_t>(0);
int frame_idx = GetFrameIdx(ws.Output<Backend>(1));

// We want to access correct order, so we comapre only the first frame of the sequence
AssertFrame(expected_order[sequence_id], sample, ground_truth_video);
// We want to access correct order, so we compare only the first frame of the sequence
AssertFrame(frame_idx, sample, ground_truth_video);
}
}
};
Expand All @@ -168,6 +172,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::CPUBackend>() {
RunShuffleTestImpl<dali::CPUBackend>("cpu", dali::CPU_ONLY_DEVICE_ID);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameIdx(
dali::TensorList<dali::CPUBackend> &device_frame_idx) {
const auto frame_idx = device_frame_idx.template tensor<int>(0);
int frame_idx_buffer = -1;
std::copy_n(frame_idx, 1, &frame_idx_buffer);
return frame_idx_buffer;
}

template<>
void VideoReaderDecoderBaseTest::RunTest<dali::GPUBackend>(
std::vector<std::string> &videos_paths,
Expand All @@ -181,6 +194,15 @@ void VideoReaderDecoderBaseTest::RunShuffleTest<dali::GPUBackend>() {
RunShuffleTestImpl<dali::GPUBackend>("gpu", 0);
}

template<>
int VideoReaderDecoderBaseTest::GetFrameIdx(
dali::TensorList<dali::GPUBackend> &device_frame_idx) {
const auto frame_idx = device_frame_idx.template tensor<int>(0);
int frame_idx_buffer = -1;
MemCopy(&frame_idx_buffer, frame_idx, sizeof(int));
return frame_idx_buffer;
}

class VideoReaderDecoderCpuTest : public VideoReaderDecoderBaseTest {
public:
void AssertLabel(const int *label, int ground_truth_label) override {
Expand Down
Loading