forked from NVIDIA/VideoProcessingFramework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SampleDecode.py
359 lines (307 loc) · 11.6 KB
/
SampleDecode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
#
# Copyright 2019 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Starting from Python 3.8 DLL search policy has changed.
# We need to add path to CUDA DLLs explicitly.
import PyNvCodec as nvc
import os
import tqdm
import argparse
from pathlib import Path
from enum import Enum
import numpy as np
import logging
logger = logging.getLogger(__file__)
if os.name == "nt":
# Add CUDA_PATH env variable
cuda_path = os.environ["CUDA_PATH"]
if cuda_path:
os.add_dll_directory(cuda_path)
else:
logger.error("CUDA_PATH environment variable is not set.")
logger.error("Can't set CUDA DLLs search path.")
exit(1)
# Add PATH as well for minor CUDA releases
sys_path = os.environ["PATH"]
if sys_path:
paths = sys_path.split(";")
for path in paths:
if os.path.isdir(path):
os.add_dll_directory(path)
else:
logger.error("PATH environment variable is not set.")
exit(1)
class InitMode(Enum):
# Decoder will be created with built-in demuxer.
BUILTIN = (0,)
# Decoder will be created with standalone FFmpeg VPF demuxer.
STANDALONE = 1
class DecodeStatus(Enum):
# Decoding error.
DEC_ERR = (0,)
# Frame was submitted to decoder.
# No frames are ready for display yet.
DEC_SUBM = (1,)
# Frame was submitted to decoder.
# There's a frame ready for display.
DEC_READY = 2
class NvDecoder:
def __init__(
self,
gpu_id: int,
enc_file: str,
dec_file: str,
dmx_mode=InitMode.STANDALONE,
):
# Save mode, we will need this later
self.init_mode = dmx_mode
if self.init_mode == InitMode.STANDALONE:
# Initialize standalone demuxer.
self.nv_dmx = nvc.PyFFmpegDemuxer(enc_file)
# Initialize decoder.
self.nv_dec = nvc.PyNvDecoder(
self.nv_dmx.Width(),
self.nv_dmx.Height(),
self.nv_dmx.Format(),
self.nv_dmx.Codec(),
gpu_id,
)
else:
# Initialize decoder with built-in demuxer.
self.nv_dmx = None
self.nv_dec = nvc.PyNvDecoder(enc_file, gpu_id)
# Frame to seek to next time decoding function is called.
# Negative values means 'don't use seek'. Non-negative values mean
# seek frame number.
self.sk_frm = int(-1)
# Total amount of decoded frames
self.num_frames_decoded = int(0)
# Numpy array to store decoded frames pixels
self.frame_nv12 = np.ndarray(shape=(0), dtype=np.uint8)
# Output file
self.out_file = open(dec_file, "wb")
# Encoded video packet
self.packet = np.ndarray(shape=(0), dtype=np.uint8)
# Encoded packet data
self.packet_data = nvc.PacketData()
# Seek mode
self.seek_mode = nvc.SeekMode.PREV_KEY_FRAME
# Returns decoder creation mode
def mode(self) -> InitMode:
return self.init_mode
# Returns video width in pixels
def width(self) -> int:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.Width()
else:
return self.nv_dec.Width()
# Returns video height in pixels
def height(self) -> int:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.Height()
else:
return self.nv_dec.Height()
# Returns number of decoded frames.
def dec_frames(self) -> int:
return self.num_frames_decoded
# Returns frame rate
def framerate(self) -> float:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.Framerate()
else:
return self.nv_dec.Framerate()
# Returns average frame rate
def avg_framerate(self) -> float:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.AvgFramerate()
else:
return self.nv_dec.AvgFramerate()
# Returns True if video has various frame rate, False otherwise
def is_vfr(self) -> bool:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.IsVFR()
else:
return self.nv_dec.IsVFR()
# Returns number of frames in video.
def stream_num_frames(self) -> int:
if self.mode() == InitMode.STANDALONE:
return self.nv_dmx.Numframes()
else:
return self.nv_dec.Numframes()
# Seek for particular frame number.
def seek(
self,
seek_frame: int,
seek_mode: nvc.SeekMode,
seek_criteria: nvc.SeekCriteria,
) -> None:
# Next time we decode frame decoder will seek for this frame first.
self.sk_frm = seek_frame
self.seek_mode = seek_mode
self.seek_criteria = seek_criteria
self.num_frames_decoded = 0
def decode_frame_standalone(self, verbose=False) -> DecodeStatus:
status = DecodeStatus.DEC_ERR
try:
# Check if we need to seek first.
if self.sk_frm >= 0:
logger.info(f"Seeking for the frame {str(self.sk_frm)}")
seek_ctx = nvc.SeekContext(
int(self.sk_frm), self.seek_mode, self.seek_criteria)
self.sk_frm = -1
if not self.nv_dmx.Seek(seek_ctx, self.packet):
return status
logger.info(
"We are at frame with pts {str(seek_ctx.out_frame_pts)}")
# Otherwise we just demux next packet.
elif not self.nv_dmx.DemuxSinglePacket(self.packet):
return status
# Send encoded packet to Nvdec.
# Nvdec is async so it may not return decoded frame immediately.
frame_ready = self.nv_dec.DecodeFrameFromPacket(
self.frame_nv12, self.packet)
if frame_ready:
self.num_frames_decoded += 1
status = DecodeStatus.DEC_READY
else:
status = DecodeStatus.DEC_SUBM
# Get last demuxed packet data.
# It stores info such as pts, duration etc.
self.nv_dmx.LastPacketData(self.packet_data)
if verbose:
logger.info(
f"frame pts (decode order) :{self.packet_data.pts}")
logger.info(
f"frame dts (decode order) :{self.packet_data.dts}")
logger.info(
f"frame pos (decode order) :{self.packet_data.pos}")
logger.info(
f"frame duration (decode order) :{self.packet_data.duration}")
except Exception as e:
logger.info(f"{getattr(e, 'message', str(e))}")
return status
def decode_frame_builtin(self, verbose=False) -> DecodeStatus:
status = DecodeStatus.DEC_ERR
try:
frame_ready = False
frame_cnt_inc = 0
if self.sk_frm >= 0:
logger.info("Seeking for the frame ", str(self.sk_frm))
seek_ctx = nvc.SeekContext(
int(self.sk_frm), self.seek_mode, self.seek_criteria)
self.sk_frm = -1
frame_ready = self.nv_dec.DecodeSingleFrame(
self.frame_nv12, seek_ctx, self.packet_data
)
frame_cnt_inc = seek_ctx.num_frames_decoded
else:
frame_ready = self.nv_dec.DecodeSingleFrame(
self.frame_nv12, self.packet_data)
frame_cnt_inc = 1
# Nvdec is sync in this mode so if frame isn't returned it means
# EOF or error.
if frame_ready:
self.num_frames_decoded += 1
status = DecodeStatus.DEC_READY
if verbose:
logger.info(f"Decoded {frame_cnt_inc} frames internally")
else:
return status
if verbose:
logger.info(
f"frame pts (display order) :{self.packet_data.pts}")
logger.info(
f"frame dts (display order) :{self.packet_data.dts}")
logger.info(
f"frame pos (display order) :{self.packet_data.pos}")
logger.info(
f"frame duration (display order) :{self.packet_data.duration}")
except Exception as e:
logger.info(f"{getattr(e, 'message', str(e))}")
return status
# Decode single video frame
def decode_frame(self, verbose=False) -> DecodeStatus:
if self.mode() == InitMode.STANDALONE:
return self.decode_frame_standalone(verbose)
else:
return self.decode_frame_builtin(verbose)
# Send empty packet to decoder to flush decoded frames queue.
def flush_frame(self, verbose=False) -> None:
ret = self.nv_dec.FlushSingleFrame(self.frame_nv12)
if ret:
self.num_frames_decoded += 1
return ret
# Write current video frame to output file.
def dump_frame(self) -> None:
bits = bytearray(self.frame_nv12)
self.out_file.write(bits)
# Decode all available video frames and write them to output file.
def decode(self, frames_to_decode=-1, verbose=False, dump_frames=True) -> None:
# Main decoding cycle
pbar = tqdm.tqdm(total=frames_to_decode, ascii=False, unit=" frames")
pbar.set_description("Decoding ")
while (self.dec_frames() < frames_to_decode) if (frames_to_decode > 0) else True:
status = self.decode_frame(verbose)
if status == DecodeStatus.DEC_ERR:
break
elif dump_frames and status == DecodeStatus.DEC_READY:
self.dump_frame()
pbar.update()
# Check if we need flush the decoder
need_flush = (self.dec_frames() < frames_to_decode) if (
frames_to_decode > 0) else True
# Flush decoded frames queue.
# This is needed only if decoder is initialized without built-in
# demuxer and we're not limited in amount of frames to decode.
while need_flush and (self.mode() == InitMode.STANDALONE):
if not self.flush_frame(verbose):
break
elif dump_frames:
self.dump_frame()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
"This sample decodes input video to raw NV12 file on given GPU."
)
parser.add_argument(
"-g",
"--gpu-id",
type=int,
required=True,
help="GPU id, check nvidia-smi",
)
parser.add_argument(
"-e",
"--encoded-file-path",
type=Path,
required=True,
help="Encoded video file (read from)",
)
parser.add_argument(
"-r",
"--raw-file-path",
type=Path,
required=True,
help="Raw NV12 video file (write to)",
)
parser.add_argument("-v", "--verbose", default=False,
action="store_true", help="Verbose")
args = parser.parse_args()
dec = NvDecoder(
args.gpu_id,
args.encoded_file_path.as_posix(),
args.raw_file_path.as_posix(),
)
dec.decode(verbose=args.verbose)
exit(0)