diff --git a/trunk/src/kernel/srs_kernel_mp4.cpp b/trunk/src/kernel/srs_kernel_mp4.cpp index b4d597f7ee..f1223286b6 100644 --- a/trunk/src/kernel/srs_kernel_mp4.cpp +++ b/trunk/src/kernel/srs_kernel_mp4.cpp @@ -5062,21 +5062,15 @@ srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track, if (sample->frame_type == SrsVideoAvcFrameTypeKeyFrame) { stss_entries.push_back(sample->index + 1); } - - if (stts) { - if (previous) { - uint32_t delta = (uint32_t)(sample->dts - previous->dts); - if (stts_entry.sample_delta == 0 || stts_entry.sample_delta == delta) { - stts_entry.sample_delta = delta; - stts_entry.sample_count++; - } else { - stts_entries.push_back(stts_entry); - stts_entry.sample_count = 1; - stts_entry.sample_delta = delta; - } - } else { - // The first sample always in the STTS table. - stts_entry.sample_count++; + + if (stts && previous) { + if (sample->dts >= previous->dts && previous->nb_samples > 0) { + uint32_t delta = (uint32_t)(sample->dts - previous->dts) / previous->nb_samples; + stts_entry.sample_count = previous->nb_samples; + // calcaulate delta in the time-scale of the media. + // moov->mvhd->timescale which is hardcoded to 1000, sample->tbn also being hardcoded to 1000. + stts_entry.sample_delta = delta * previous->tbn / 1000; + stts_entries.push_back(stts_entry); } } @@ -5097,7 +5091,10 @@ srs_error_t SrsMp4SampleManager::write_track(SrsFrameType track, previous = sample; } - if (stts && stts_entry.sample_count) { + if (stts && previous && previous->nb_samples > 0) { + stts_entry.sample_count = previous->nb_samples; + // Can't calculate last sample duration, so set sample_delta to 1. + stts_entry.sample_delta = 1; stts_entries.push_back(stts_entry); } @@ -5828,10 +5825,12 @@ srs_error_t SrsMp4Encoder::write_sample( ps->type = SrsFrameTypeVideo; ps->frame_type = (SrsVideoAvcFrameType)ft; ps->index = nb_videos++; + ps->nb_samples = format->video->nb_samples; vduration = dts; } else if (ht == SrsMp4HandlerTypeSOUN) { ps->type = SrsFrameTypeAudio; ps->index = nb_audios++; + ps->nb_samples = format->audio->nb_samples; aduration = dts; } else { srs_freep(ps); diff --git a/trunk/src/kernel/srs_kernel_mp4.hpp b/trunk/src/kernel/srs_kernel_mp4.hpp index 23805773e6..4718107ee2 100644 --- a/trunk/src/kernel/srs_kernel_mp4.hpp +++ b/trunk/src/kernel/srs_kernel_mp4.hpp @@ -1554,7 +1554,7 @@ class SrsMp4SampleDescriptionBox : public SrsMp4FullBox }; // 8.6.1.2 Decoding Time to Sample Box (stts), for Audio/Video. -// ISO_IEC_14496-12-base-format-2012.pdf, page 48 +// ISO_IEC_14496-12-base-format-2012.pdf, page 36 class SrsMp4SttsEntry { public: @@ -1893,6 +1893,9 @@ class SrsMp4Sample // The sample data. uint32_t nb_data; uint8_t* data; + // number of nalu|audio-frames in this sample. + uint32_t nb_samples; + public: SrsMp4Sample(); virtual ~SrsMp4Sample();