/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "ADTSDemuxer.h"
#include "TimeUnits.h"
#include "VideoUtils.h"
#include "mozilla/Logging.h"
#include "mozilla/UniquePtr.h"
#include "Adts.h"
#include <inttypes.h>
extern mozilla::LazyLogModule gMediaDemuxerLog;
#define LOG(msg, ...) \
MOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg,
##__VA_ARGS__)
#define ADTSLOG(msg, ...) \
DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Debug, msg,
##__VA_ARGS__)
#define ADTSLOGV(msg, ...) \
DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, msg,
##__VA_ARGS__)
namespace mozilla {
using media::TimeUnit;
// ADTSDemuxer
ADTSDemuxer::ADTSDemuxer(MediaResource* aSource) : mSource(aSource) {
DDLINKCHILD(
"source", aSource);
}
bool ADTSDemuxer::InitInternal() {
if (!mTrackDemuxer) {
mTrackDemuxer =
new ADTSTrackDemuxer(mSource);
DDLINKCHILD(
"track demuxer", mTrackDemuxer.get());
}
return mTrackDemuxer->Init();
}
RefPtr<ADTSDemuxer::InitPromise> ADTSDemuxer::Init() {
if (!InitInternal()) {
ADTSLOG(
"Init() failure: waiting for data");
return InitPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_METADATA_ERR,
__func__);
}
ADTSLOG(
"Init() successful");
return InitPromise::CreateAndResolve(NS_OK, __func__);
}
uint32_t ADTSDemuxer::GetNumberTracks(TrackInfo::TrackType aType)
const {
return (aType == TrackInfo::kAudioTrack) ? 1 : 0;
}
already_AddRefed<MediaTrackDemuxer> ADTSDemuxer::GetTrackDemuxer(
TrackInfo::TrackType aType, uint32_t aTrackNumber) {
if (!mTrackDemuxer) {
return nullptr;
}
return RefPtr<ADTSTrackDemuxer>(mTrackDemuxer).forget();
}
bool ADTSDemuxer::IsSeekable()
const {
int64_t length = mSource->GetLength();
return length > -1;
}
// ADTSTrackDemuxer
ADTSTrackDemuxer::ADTSTrackDemuxer(MediaResource* aSource)
: mSource(aSource),
mParser(
new ADTS::FrameParser()),
mOffset(0),
mNumParsedFrames(0),
mFrameIndex(0),
mTotalFrameLen(0),
mSamplesPerFrame(0),
mSamplesPerSecond(0),
mChannels(0) {
DDLINKCHILD(
"source", aSource);
Reset();
}
ADTSTrackDemuxer::~ADTSTrackDemuxer() {
delete mParser; }
bool ADTSTrackDemuxer::Init() {
FastSeek(TimeUnit::Zero());
// Read the first frame to fetch sample rate and other meta data.
RefPtr<MediaRawData> frame(GetNextFrame(FindNextFrame(
true)));
ADTSLOG(
"Init StreamLength()=%" PRId64
" first-frame-found=%d",
StreamLength(), !!frame);
if (!frame) {
return false;
}
// Rewind back to the stream begin to avoid dropping the first frame.
FastSeek(TimeUnit::Zero());
if (!mSamplesPerSecond) {
return false;
}
if (!mInfo) {
mInfo = MakeUnique<AudioInfo>();
}
mInfo->mRate = mSamplesPerSecond;
mInfo->mChannels = mChannels;
mInfo->mBitDepth = 16;
mInfo->mDuration = Duration();
// AAC Specific information
mInfo->mMimeType =
"audio/mp4a-latm";
// Configure AAC codec-specific values.
// For AAC, mProfile and mExtendedProfile contain the audioObjectType from
// Table 1.3 -- Audio Profile definition, ISO/IEC 14496-3. Eg. 2 == AAC LC
mInfo->mProfile = mInfo->mExtendedProfile =
mParser->FirstFrame().Header().mObjectType;
AudioCodecSpecificBinaryBlob blob;
InitAudioSpecificConfig(mParser->FirstFrame(), blob.mBinaryBlob);
mInfo->mCodecSpecificConfig = AudioCodecSpecificVariant{std::move(blob)};
ADTSLOG(
"Init mInfo={mRate=%u mChannels=%u mBitDepth=%u mDuration=%" PRId64
"}",
mInfo->mRate, mInfo->mChannels, mInfo->mBitDepth,
mInfo->mDuration.ToMicroseconds());
// AAC encoder delay can be 2112 (typical value when using Apple AAC encoder),
// or 1024 (typical value when encoding using fdk_aac, often via ffmpeg).
// See
// https://developer.apple.com/library/content/documentation/QuickTime/QTFF/QTFFAppenG/QTFFAppenG.html
// In an attempt to not trim valid audio data, and because ADTS doesn't
// provide a way to know this pre-roll value, this offets by 1024 frames.
mPreRoll = TimeUnit(1024, mSamplesPerSecond);
return mChannels;
}
UniquePtr<TrackInfo> ADTSTrackDemuxer::GetInfo()
const {
return mInfo->Clone();
}
RefPtr<ADTSTrackDemuxer::SeekPromise> ADTSTrackDemuxer::Seek(
const TimeUnit& aTime) {
// Efficiently seek to the position.
const TimeUnit time = aTime > mPreRoll ? aTime - mPreRoll : TimeUnit::Zero();
FastSeek(time);
// Correct seek position by scanning the next frames.
const TimeUnit seekTime = ScanUntil(time);
return SeekPromise::CreateAndResolve(seekTime, __func__);
}
TimeUnit ADTSTrackDemuxer::FastSeek(
const TimeUnit& aTime) {
ADTSLOG(
"FastSeek(%" PRId64
") avgFrameLen=%f mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mOffset=%" PRIu64,
aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames,
mFrameIndex, mOffset);
const uint64_t firstFrameOffset = mParser->FirstFrame().Offset();
if (!aTime.ToMicroseconds()) {
// Quick seek to the beginning of the stream.
mOffset = firstFrameOffset;
}
else if (AverageFrameLength() > 0) {
mOffset =
firstFrameOffset +
AssertedCast<uint64_t>(AssertedCast<
double>(FrameIndexFromTime(aTime)) *
AverageFrameLength());
}
const int64_t streamLength = StreamLength();
if (mOffset > firstFrameOffset && streamLength > 0) {
mOffset = std::min(
static_cast<uint64_t>(streamLength - 1), mOffset);
}
mFrameIndex = FrameIndexFromOffset(mOffset);
mParser->EndFrameSession();
ADTSLOG(
"FastSeek End avgFrameLen=%f mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mFirstFrameOffset=%" PRIu64
" mOffset=%" PRIu64
" SL=%" PRIu64
"",
AverageFrameLength(), mNumParsedFrames, mFrameIndex, firstFrameOffset,
mOffset, streamLength);
return Duration(mFrameIndex);
}
TimeUnit ADTSTrackDemuxer::ScanUntil(
const TimeUnit& aTime) {
ADTSLOG(
"ScanUntil(%" PRId64
") avgFrameLen=%f mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mOffset=%" PRIu64,
aTime.ToMicroseconds(), AverageFrameLength(), mNumParsedFrames,
mFrameIndex, mOffset);
if (!aTime.ToMicroseconds()) {
return FastSeek(aTime);
}
if (Duration(mFrameIndex) > aTime) {
FastSeek(aTime);
}
while (SkipNextFrame(FindNextFrame()) && Duration(mFrameIndex + 1) < aTime) {
ADTSLOGV(
"ScanUntil* avgFrameLen=%f mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mOffset=%" PRIu64
" Duration=%" PRId64,
AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset,
Duration(mFrameIndex + 1).ToMicroseconds());
}
ADTSLOG(
"ScanUntil End avgFrameLen=%f mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mOffset=%" PRIu64,
AverageFrameLength(), mNumParsedFrames, mFrameIndex, mOffset);
return Duration(mFrameIndex);
}
RefPtr<ADTSTrackDemuxer::SamplesPromise> ADTSTrackDemuxer::GetSamples(
int32_t aNumSamples) {
ADTSLOGV(
"GetSamples(%d) Begin mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d "
"mSamplesPerSecond=%d mChannels=%d",
aNumSamples, mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen,
mSamplesPerFrame, mSamplesPerSecond, mChannels);
MOZ_ASSERT(aNumSamples);
RefPtr<SamplesHolder> frames =
new SamplesHolder();
while (aNumSamples--) {
RefPtr<MediaRawData> frame(GetNextFrame(FindNextFrame()));
if (!frame)
break;
frames->AppendSample(frame);
}
ADTSLOGV(
"GetSamples() End mSamples.Size()=%zu aNumSamples=%d mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d mSamplesPerSecond=%d "
"mChannels=%d",
frames->GetSamples().Length(), aNumSamples, mOffset, mNumParsedFrames,
mFrameIndex, mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond,
mChannels);
if (frames->GetSamples().IsEmpty()) {
return SamplesPromise::CreateAndReject(NS_ERROR_DOM_MEDIA_END_OF_STREAM,
__func__);
}
return SamplesPromise::CreateAndResolve(frames, __func__);
}
void ADTSTrackDemuxer::Reset() {
ADTSLOG(
"Reset()");
MOZ_ASSERT(mParser);
if (mParser) {
mParser->Reset();
}
FastSeek(TimeUnit::Zero());
}
RefPtr<ADTSTrackDemuxer::SkipAccessPointPromise>
ADTSTrackDemuxer::SkipToNextRandomAccessPoint(
const TimeUnit& aTimeThreshold) {
// Will not be called for audio-only resources.
return SkipAccessPointPromise::CreateAndReject(
SkipFailureHolder(NS_ERROR_DOM_MEDIA_DEMUXER_ERR, 0), __func__);
}
int64_t ADTSTrackDemuxer::GetResourceOffset()
const {
return AssertedCast<int64_t>(mOffset);
}
media::TimeIntervals ADTSTrackDemuxer::GetBuffered() {
auto duration = Duration();
if (duration.IsInfinite()) {
return media::TimeIntervals();
}
AutoPinned<MediaResource> stream(mSource.GetResource());
return GetEstimatedBufferedTimeRanges(stream, duration.ToMicroseconds());
}
int64_t ADTSTrackDemuxer::StreamLength()
const {
return mSource.GetLength(); }
TimeUnit ADTSTrackDemuxer::Duration()
const {
if (!mNumParsedFrames) {
return TimeUnit::Invalid();
}
const int64_t streamLen = StreamLength();
if (streamLen < 0) {
// Unknown length, we can't estimate duration, this is probably a live
// stream.
return TimeUnit::FromInfinity();
}
const int64_t firstFrameOffset =
AssertedCast<int64_t>(mParser->FirstFrame().Offset());
int64_t numFrames =
AssertedCast<int64_t>(AssertedCast<
double>(streamLen - firstFrameOffset) /
AverageFrameLength());
return Duration(numFrames);
}
TimeUnit ADTSTrackDemuxer::Duration(int64_t aNumFrames)
const {
if (!mSamplesPerSecond) {
return TimeUnit::Invalid();
}
return TimeUnit(aNumFrames * mSamplesPerFrame, mSamplesPerSecond);
}
const ADTS::Frame& ADTSTrackDemuxer::FindNextFrame(
bool findFirstFrame
/*= false*/) {
static const int BUFFER_SIZE = 4096;
static const int MAX_SKIPPED_BYTES = 10 * BUFFER_SIZE;
ADTSLOGV(
"FindNext() Begin mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d",
mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen,
mSamplesPerFrame, mSamplesPerSecond, mChannels);
uint8_t buffer[BUFFER_SIZE];
uint32_t read = 0;
bool foundFrame =
false;
uint64_t frameHeaderOffset = mOffset;
// Prepare the parser for the next frame parsing session.
mParser->EndFrameSession();
// Check whether we've found a valid ADTS frame.
while (!foundFrame) {
if ((read = Read(buffer, AssertedCast<int64_t>(frameHeaderOffset),
BUFFER_SIZE)) == 0) {
ADTSLOG(
"FindNext() EOS without a frame");
break;
}
if (frameHeaderOffset - mOffset > MAX_SKIPPED_BYTES) {
ADTSLOG(
"FindNext() exceeded MAX_SKIPPED_BYTES without a frame");
break;
}
const ADTS::Frame& currentFrame = mParser->CurrentFrame();
foundFrame = mParser->Parse(frameHeaderOffset, buffer, buffer + read);
if (findFirstFrame && foundFrame) {
// Check for sync marker after the found frame, since it's
// possible to find sync marker in AAC data. If sync marker
// exists after the current frame then we've found a frame
// header.
uint64_t nextFrameHeaderOffset =
currentFrame.Offset() + currentFrame.Length();
uint32_t read =
Read(buffer, AssertedCast<int64_t>(nextFrameHeaderOffset), 2);
if (read != 2 || !ADTS::FrameHeader::MatchesSync(buffer)) {
frameHeaderOffset = currentFrame.Offset() + 1;
mParser->Reset();
foundFrame =
false;
continue;
}
}
if (foundFrame) {
break;
}
// Minimum header size is 7 bytes.
uint64_t advance = read - 7;
// Check for offset overflow.
if (frameHeaderOffset + advance <= frameHeaderOffset) {
break;
}
frameHeaderOffset += advance;
}
if (!foundFrame || !mParser->CurrentFrame().Length()) {
ADTSLOG(
"FindNext() Exit foundFrame=%d mParser->CurrentFrame().Length()=%zu ",
foundFrame, mParser->CurrentFrame().Length());
mParser->Reset();
return mParser->CurrentFrame();
}
ADTSLOGV(
"FindNext() End mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" frameHeaderOffset=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d mSamplesPerSecond=%d"
" mChannels=%d",
mOffset, mNumParsedFrames, mFrameIndex, frameHeaderOffset,
mTotalFrameLen, mSamplesPerFrame, mSamplesPerSecond, mChannels);
return mParser->CurrentFrame();
}
bool ADTSTrackDemuxer::SkipNextFrame(
const ADTS::Frame& aFrame) {
if (!mNumParsedFrames || !aFrame.Length()) {
RefPtr<MediaRawData> frame(GetNextFrame(aFrame));
return frame;
}
UpdateState(aFrame);
ADTSLOGV(
"SkipNext() End mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d",
mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen,
mSamplesPerFrame, mSamplesPerSecond, mChannels);
return true;
}
already_AddRefed<MediaRawData> ADTSTrackDemuxer::GetNextFrame(
const ADTS::Frame& aFrame) {
ADTSLOG(
"GetNext() Begin({mOffset=%" PRIu64
" HeaderSize()=%" PRIu64
" Length()=%zu})",
aFrame.Offset(), aFrame.Header().HeaderSize(),
aFrame.PayloadLength());
if (!aFrame.IsValid())
return nullptr;
const int64_t offset = AssertedCast<int64_t>(aFrame.PayloadOffset());
const uint32_t length = aFrame.PayloadLength();
RefPtr<MediaRawData> frame =
new MediaRawData();
frame->mOffset = offset;
UniquePtr<MediaRawDataWriter> frameWriter(frame->CreateWriter());
if (!frameWriter->SetSize(length)) {
ADTSLOG(
"GetNext() Exit failed to allocated media buffer");
return nullptr;
}
const uint32_t read =
Read(frameWriter->Data(), offset, AssertedCast<int32_t>(length));
if (read != length) {
ADTSLOG(
"GetNext() Exit read=%u frame->Size()=%zu", read, frame->Size());
return nullptr;
}
UpdateState(aFrame);
TimeUnit rawpts = Duration(mFrameIndex - 1) - mPreRoll;
TimeUnit rawDuration = Duration(1);
TimeUnit rawend = rawpts + rawDuration;
frame->mTime = std::max(TimeUnit::Zero(), rawpts);
frame->mDuration = Duration(1);
frame->mTimecode = frame->mTime;
frame->mKeyframe =
true;
// Handle decoder delay. A packet must be trimmed if its pts, adjusted for
// decoder delay, is negative. A packet can be trimmed entirely.
if (rawpts.IsNegative()) {
frame->mDuration = std::max(TimeUnit::Zero(), rawend - frame->mTime);
}
// ADTS frames can have a presentation duration of zero, e.g. when a frame is
// part of preroll.
MOZ_ASSERT(frame->mDuration.IsPositiveOrZero());
ADTSLOG(
"ADTS packet demuxed: pts [%lf, %lf] (duration: %lf)",
frame->mTime.ToSeconds(), frame->GetEndTime().ToSeconds(),
frame->mDuration.ToSeconds());
// Indicate original packet information to trim after decoding.
if (frame->mDuration != rawDuration) {
frame->mOriginalPresentationWindow =
Some(media::TimeInterval{rawpts, rawend});
ADTSLOG(
"Total packet time excluding trimming: [%lf, %lf]",
rawpts.ToSeconds(), rawend.ToSeconds());
}
ADTSLOGV(
"GetNext() End mOffset=%" PRIu64
" mNumParsedFrames=%" PRIu64
" mFrameIndex=%" PRId64
" mTotalFrameLen=%" PRIu64
" mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d",
mOffset, mNumParsedFrames, mFrameIndex, mTotalFrameLen,
mSamplesPerFrame, mSamplesPerSecond, mChannels);
return frame.forget();
}
int64_t ADTSTrackDemuxer::FrameIndexFromOffset(uint64_t aOffset)
const {
int64_t frameIndex = 0;
if (AverageFrameLength() > 0) {
frameIndex = AssertedCast<int64_t>(
AssertedCast<
double>(aOffset - mParser->FirstFrame().Offset()) /
AverageFrameLength());
MOZ_ASSERT(frameIndex >= 0);
}
ADTSLOGV(
"FrameIndexFromOffset(%" PRId64
") -> %" PRId64, aOffset,
frameIndex);
return frameIndex;
}
int64_t ADTSTrackDemuxer::FrameIndexFromTime(
const TimeUnit& aTime)
const {
int64_t frameIndex = 0;
if (mSamplesPerSecond > 0 && mSamplesPerFrame > 0) {
frameIndex = AssertedCast<int64_t>(aTime.ToSeconds() * mSamplesPerSecond /
mSamplesPerFrame) -
1;
}
ADTSLOGV(
"FrameIndexFromOffset(%fs) -> %" PRId64, aTime.ToSeconds(),
frameIndex);
return std::max<int64_t>(0, frameIndex);
}
void ADTSTrackDemuxer::UpdateState(
const ADTS::Frame& aFrame) {
uint32_t frameLength = aFrame.Length();
// Prevent overflow.
if (mTotalFrameLen + frameLength < mTotalFrameLen) {
// These variables have a linear dependency and are only used to derive the
// average frame length.
mTotalFrameLen /= 2;
mNumParsedFrames /= 2;
}
// Full frame parsed, move offset to its end.
mOffset = aFrame.Offset() + frameLength;
mTotalFrameLen += frameLength;
if (!mSamplesPerFrame) {
const ADTS::FrameHeader& header = aFrame.Header();
mSamplesPerFrame = header.mSamples;
mSamplesPerSecond = header.mSampleRate;
mChannels = header.mChannels;
}
++mNumParsedFrames;
++mFrameIndex;
MOZ_ASSERT(mFrameIndex > 0);
}
uint32_t ADTSTrackDemuxer::Read(uint8_t* aBuffer, int64_t aOffset,
int32_t aSize) {
ADTSLOGV(
"ADTSTrackDemuxer::Read(%p %" PRId64
" %d)", aBuffer, aOffset,
aSize);
const int64_t streamLen = StreamLength();
if (mInfo && streamLen > 0) {
int64_t max = streamLen > aOffset ? streamLen - aOffset : 0;
// Prevent blocking reads after successful initialization.
aSize = std::min<int32_t>(aSize, AssertedCast<int32_t>(max));
}
uint32_t read = 0;
ADTSLOGV(
"ADTSTrackDemuxer::Read -> ReadAt(%d)", aSize);
const nsresult rv = mSource.ReadAt(aOffset,
reinterpret_cast<
char*>(aBuffer),
static_cast<uint32_t>(aSize), &read);
NS_ENSURE_SUCCESS(rv, 0);
return read;
}
double ADTSTrackDemuxer::AverageFrameLength()
const {
if (mNumParsedFrames) {
return AssertedCast<
double>(mTotalFrameLen) /
AssertedCast<
double>(mNumParsedFrames);
}
return 0.0;
}
/* static */
bool ADTSDemuxer::ADTSSniffer(
const uint8_t* aData,
const uint32_t aLength) {
if (aLength < 7) {
return false;
}
if (!ADTS::FrameHeader::MatchesSync(Span(aData, aLength))) {
return false;
}
auto parser = MakeUnique<ADTS::FrameParser>();
if (!parser->Parse(0, aData, aData + aLength)) {
return false;
}
const ADTS::Frame& currentFrame = parser->CurrentFrame();
// Check for sync marker after the found frame, since it's
// possible to find sync marker in AAC data. If sync marker
// exists after the current frame then we've found a frame
// header.
uint64_t nextFrameHeaderOffset =
currentFrame.Offset() + currentFrame.Length();
return aLength > nextFrameHeaderOffset &&
aLength - nextFrameHeaderOffset >= 2 &&
ADTS::FrameHeader::MatchesSync(Span(aData + nextFrameHeaderOffset,
aLength - nextFrameHeaderOffset));
}
}
// namespace mozilla