1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //#define LOG_NDEBUG 0
18 #define LOG_TAG "MPEG4Extractor"
19
20 #include <ctype.h>
21 #include <inttypes.h>
22 #include <algorithm>
23 #include <map>
24 #include <memory>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include <log/log.h>
30 #include <utils/Log.h>
31
32 #include "AC4Parser.h"
33 #include "MPEG4Extractor.h"
34 #include "SampleTable.h"
35 #include "ItemTable.h"
36
37 #include <ESDS.h>
38 #include <ID3.h>
39 #include <media/stagefright/DataSourceBase.h>
40 #include <media/ExtractorUtils.h>
41 #include <media/stagefright/foundation/ABitReader.h>
42 #include <media/stagefright/foundation/ABuffer.h>
43 #include <media/stagefright/foundation/ADebug.h>
44 #include <media/stagefright/foundation/AMessage.h>
45 #include <media/stagefright/foundation/AudioPresentationInfo.h>
46 #include <media/stagefright/foundation/AUtils.h>
47 #include <media/stagefright/foundation/ByteUtils.h>
48 #include <media/stagefright/foundation/ColorUtils.h>
49 #include <media/stagefright/foundation/avc_utils.h>
50 #include <media/stagefright/foundation/hexdump.h>
51 #include <media/stagefright/foundation/OpusHeader.h>
52 #include <media/stagefright/MediaBufferGroup.h>
53 #include <media/stagefright/MediaDefs.h>
54 #include <media/stagefright/MetaDataBase.h>
55 #include <utils/String8.h>
56
57 #include <byteswap.h>
58
59 #ifndef UINT32_MAX
60 #define UINT32_MAX (4294967295U)
61 #endif
62
63 #define ALAC_SPECIFIC_INFO_SIZE (36)
64
65 // TODO : Remove the defines once mainline media is built against NDK >= 31.
66 // The mp4 extractor is part of mainline and builds against NDK 29 as of
67 // writing. These keys are available only from NDK 31:
68 #define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
69 "mpegh-profile-level-indication"
70 #define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
71 "mpegh-reference-channel-layout"
72 #define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
73 "mpegh-compatible-sets"
74
75 namespace android {
76
77 enum {
78 // max track header chunk to return
79 kMaxTrackHeaderSize = 32,
80
81 // maximum size of an atom. Some atoms can be bigger according to the spec,
82 // but we only allow up to this size.
83 kMaxAtomSize = 64 * 1024 * 1024,
84 };
85
86 class MPEG4Source : public MediaTrackHelper {
87 static const size_t kMaxPcmFrameSize = 8192;
88 public:
89 // Caller retains ownership of both "dataSource" and "sampleTable".
90 MPEG4Source(AMediaFormat *format,
91 DataSourceHelper *dataSource,
92 int32_t timeScale,
93 const sp<SampleTable> &sampleTable,
94 Vector<SidxEntry> &sidx,
95 const Trex *trex,
96 off64_t firstMoofOffset,
97 const sp<ItemTable> &itemTable,
98 uint64_t elstShiftStartTicks,
99 uint64_t elstInitialEmptyEditTicks);
100 virtual status_t init();
101
102 virtual media_status_t start();
103 virtual media_status_t stop();
104
105 virtual media_status_t getFormat(AMediaFormat *);
106
107 virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
supportsNonBlockingRead()108 bool supportsNonBlockingRead() override { return true; }
109 virtual media_status_t fragmentedRead(
110 MediaBufferHelper **buffer, const ReadOptions *options = NULL);
111
112 virtual ~MPEG4Source();
113
114 private:
115 Mutex mLock;
116
117 AMediaFormat *mFormat;
118 DataSourceHelper *mDataSource;
119 int32_t mTimescale;
120 sp<SampleTable> mSampleTable;
121 uint32_t mCurrentSampleIndex;
122 uint32_t mCurrentFragmentIndex;
123 Vector<SidxEntry> &mSegments;
124 const Trex *mTrex;
125 off64_t mFirstMoofOffset;
126 off64_t mCurrentMoofOffset;
127 off64_t mCurrentMoofSize;
128 off64_t mNextMoofOffset;
129 uint32_t mCurrentTime; // in media timescale ticks
130 int32_t mLastParsedTrackId;
131 int32_t mTrackId;
132
133 int32_t mCryptoMode; // passed in from extractor
134 int32_t mDefaultIVSize; // passed in from extractor
135 uint8_t mCryptoKey[16]; // passed in from extractor
136 int32_t mDefaultEncryptedByteBlock;
137 int32_t mDefaultSkipByteBlock;
138 uint32_t mCurrentAuxInfoType;
139 uint32_t mCurrentAuxInfoTypeParameter;
140 int32_t mCurrentDefaultSampleInfoSize;
141 uint32_t mCurrentSampleInfoCount;
142 uint32_t mCurrentSampleInfoAllocSize;
143 uint8_t* mCurrentSampleInfoSizes;
144 uint32_t mCurrentSampleInfoOffsetCount;
145 uint32_t mCurrentSampleInfoOffsetsAllocSize;
146 uint64_t* mCurrentSampleInfoOffsets;
147
148 bool mIsAVC;
149 bool mIsHEVC;
150 bool mIsDolbyVision;
151 bool mIsAC4;
152 bool mIsMpegH = false;
153 bool mIsPcm;
154 size_t mNALLengthSize;
155
156 bool mStarted;
157
158 MediaBufferHelper *mBuffer;
159
160 size_t mSrcBufferSize;
161 uint8_t *mSrcBuffer;
162
163 bool mIsHeif;
164 bool mIsAvif;
165 bool mIsAudio;
166 bool mIsUsac = false;
167 sp<ItemTable> mItemTable;
168
169 /* Shift start offset (move to earlier time) when media_time > 0,
170 * in media time scale.
171 */
172 uint64_t mElstShiftStartTicks;
173 /* Initial start offset (move to later time), empty edit list entry
174 * in media time scale.
175 */
176 uint64_t mElstInitialEmptyEditTicks;
177
178 size_t parseNALSize(const uint8_t *data) const;
179 status_t parseChunk(off64_t *offset);
180 status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
181 status_t parseTrackFragmentRun(off64_t offset, off64_t size);
182 status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
183 status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
184 status_t parseClearEncryptedSizes(off64_t offset, bool isSampleEncryption,
185 uint32_t flags, off64_t size);
186 status_t parseSampleEncryption(off64_t offset, off64_t size);
187 // returns -1 for invalid layer ID
188 int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
189
190 struct TrackFragmentHeaderInfo {
191 enum Flags {
192 kBaseDataOffsetPresent = 0x01,
193 kSampleDescriptionIndexPresent = 0x02,
194 kDefaultSampleDurationPresent = 0x08,
195 kDefaultSampleSizePresent = 0x10,
196 kDefaultSampleFlagsPresent = 0x20,
197 kDurationIsEmpty = 0x10000,
198 };
199
200 uint32_t mTrackID;
201 uint32_t mFlags;
202 uint64_t mBaseDataOffset;
203 uint32_t mSampleDescriptionIndex;
204 uint32_t mDefaultSampleDuration;
205 uint32_t mDefaultSampleSize;
206 uint32_t mDefaultSampleFlags;
207
208 uint64_t mDataOffset;
209 };
210 TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
211
212 struct Sample {
213 off64_t offset;
214 size_t size;
215 uint32_t duration;
216 int32_t compositionOffset;
217 uint8_t iv[16];
218 Vector<uint32_t> clearsizes;
219 Vector<uint32_t> encryptedsizes;
220 };
221 Vector<Sample> mCurrentSamples;
222 std::map<off64_t, uint32_t> mDrmOffsets;
223
224 MPEG4Source(const MPEG4Source &);
225 MPEG4Source &operator=(const MPEG4Source &);
226 };
227
228 // This custom data source wraps an existing one and satisfies requests
229 // falling entirely within a cached range from the cache while forwarding
230 // all remaining requests to the wrapped datasource.
231 // This is used to cache the full sampletable metadata for a single track,
232 // possibly wrapping multiple times to cover all tracks, i.e.
233 // Each CachedRangedDataSource caches the sampletable metadata for a single track.
234
235 class CachedRangedDataSource : public DataSourceHelper {
236 public:
237 explicit CachedRangedDataSource(DataSourceHelper *source);
238 virtual ~CachedRangedDataSource();
239
240 ssize_t readAt(off64_t offset, void *data, size_t size) override;
241 status_t getSize(off64_t *size) override;
242 uint32_t flags() override;
243
244 status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
245
246
247 private:
248 Mutex mLock;
249
250 DataSourceHelper *mSource;
251 bool mOwnsDataSource;
252 off64_t mCachedOffset;
253 size_t mCachedSize;
254 uint8_t *mCache;
255
256 void clearCache();
257
258 CachedRangedDataSource(const CachedRangedDataSource &);
259 CachedRangedDataSource &operator=(const CachedRangedDataSource &);
260 };
261
CachedRangedDataSource(DataSourceHelper * source)262 CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
263 : DataSourceHelper(source),
264 mSource(source),
265 mOwnsDataSource(false),
266 mCachedOffset(0),
267 mCachedSize(0),
268 mCache(NULL) {
269 }
270
~CachedRangedDataSource()271 CachedRangedDataSource::~CachedRangedDataSource() {
272 clearCache();
273 if (mOwnsDataSource) {
274 delete mSource;
275 }
276 }
277
clearCache()278 void CachedRangedDataSource::clearCache() {
279 if (mCache) {
280 free(mCache);
281 mCache = NULL;
282 }
283
284 mCachedOffset = 0;
285 mCachedSize = 0;
286 }
287
readAt(off64_t offset,void * data,size_t size)288 ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
289 Mutex::Autolock autoLock(mLock);
290
291 if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
292 memcpy(data, &mCache[offset - mCachedOffset], size);
293 return size;
294 }
295
296 return mSource->readAt(offset, data, size);
297 }
298
getSize(off64_t * size)299 status_t CachedRangedDataSource::getSize(off64_t *size) {
300 return mSource->getSize(size);
301 }
302
flags()303 uint32_t CachedRangedDataSource::flags() {
304 return mSource->flags();
305 }
306
setCachedRange(off64_t offset,size_t size,bool assumeSourceOwnershipOnSuccess)307 status_t CachedRangedDataSource::setCachedRange(off64_t offset,
308 size_t size,
309 bool assumeSourceOwnershipOnSuccess) {
310 Mutex::Autolock autoLock(mLock);
311
312 clearCache();
313
314 mCache = (uint8_t *)malloc(size);
315
316 if (mCache == NULL) {
317 return -ENOMEM;
318 }
319
320 mCachedOffset = offset;
321 mCachedSize = size;
322
323 ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
324
325 if (err < (ssize_t)size) {
326 clearCache();
327
328 return ERROR_IO;
329 }
330 mOwnsDataSource = assumeSourceOwnershipOnSuccess;
331 return OK;
332 }
333
334 ////////////////////////////////////////////////////////////////////////////////
335
336 static const bool kUseHexDump = false;
337
FourCC2MIME(uint32_t fourcc)338 static const char *FourCC2MIME(uint32_t fourcc) {
339 switch (fourcc) {
340 case FOURCC("mp4a"):
341 return MEDIA_MIMETYPE_AUDIO_AAC;
342
343 case FOURCC("samr"):
344 return MEDIA_MIMETYPE_AUDIO_AMR_NB;
345
346 case FOURCC("sawb"):
347 return MEDIA_MIMETYPE_AUDIO_AMR_WB;
348
349 case FOURCC("ec-3"):
350 return MEDIA_MIMETYPE_AUDIO_EAC3;
351
352 case FOURCC("mp4v"):
353 return MEDIA_MIMETYPE_VIDEO_MPEG4;
354
355 case FOURCC("s263"):
356 case FOURCC("h263"):
357 case FOURCC("H263"):
358 return MEDIA_MIMETYPE_VIDEO_H263;
359
360 case FOURCC("avc1"):
361 return MEDIA_MIMETYPE_VIDEO_AVC;
362
363 case FOURCC("hvc1"):
364 case FOURCC("hev1"):
365 return MEDIA_MIMETYPE_VIDEO_HEVC;
366
367 case FOURCC("dvav"):
368 case FOURCC("dva1"):
369 case FOURCC("dvhe"):
370 case FOURCC("dvh1"):
371 case FOURCC("dav1"):
372 return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
373
374 case FOURCC("ac-4"):
375 return MEDIA_MIMETYPE_AUDIO_AC4;
376 case FOURCC("Opus"):
377 return MEDIA_MIMETYPE_AUDIO_OPUS;
378
379 case FOURCC("twos"):
380 case FOURCC("sowt"):
381 return MEDIA_MIMETYPE_AUDIO_RAW;
382 case FOURCC("alac"):
383 return MEDIA_MIMETYPE_AUDIO_ALAC;
384 case FOURCC("fLaC"):
385 return MEDIA_MIMETYPE_AUDIO_FLAC;
386 case FOURCC("av01"):
387 return MEDIA_MIMETYPE_VIDEO_AV1;
388 case FOURCC("vp09"):
389 return MEDIA_MIMETYPE_VIDEO_VP9;
390 case FOURCC(".mp3"):
391 case 0x6D730055: // "ms U" mp3 audio
392 return MEDIA_MIMETYPE_AUDIO_MPEG;
393 case FOURCC("mha1"):
394 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
395 case FOURCC("mhm1"):
396 return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
397 default:
398 ALOGW("Unknown fourcc: %c%c%c%c",
399 (fourcc >> 24) & 0xff,
400 (fourcc >> 16) & 0xff,
401 (fourcc >> 8) & 0xff,
402 fourcc & 0xff
403 );
404 return "application/octet-stream";
405 }
406 }
407
AdjustChannelsAndRate(uint32_t fourcc,uint32_t * channels,uint32_t * rate)408 static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
409 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
410 // AMR NB audio is always mono, 8kHz
411 *channels = 1;
412 *rate = 8000;
413 return true;
414 } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
415 // AMR WB audio is always mono, 16kHz
416 *channels = 1;
417 *rate = 16000;
418 return true;
419 }
420 return false;
421 }
422
MPEG4Extractor(DataSourceHelper * source,const char * mime)423 MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
424 : mMoofOffset(0),
425 mMoofFound(false),
426 mMdatFound(false),
427 mDataSource(source),
428 mInitCheck(NO_INIT),
429 mHeaderTimescale(0),
430 mIsQT(false),
431 mIsHeif(false),
432 mHasMoovBox(false),
433 mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
434 mIsAvif(false),
435 mFirstTrack(NULL),
436 mLastTrack(NULL) {
437 ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
438 mFileMetaData = AMediaFormat_new();
439 }
440
~MPEG4Extractor()441 MPEG4Extractor::~MPEG4Extractor() {
442 Track *track = mFirstTrack;
443 while (track) {
444 Track *next = track->next;
445
446 delete track;
447 track = next;
448 }
449 mFirstTrack = mLastTrack = NULL;
450
451 for (size_t i = 0; i < mPssh.size(); i++) {
452 delete [] mPssh[i].data;
453 }
454 mPssh.clear();
455
456 delete mDataSource;
457 AMediaFormat_delete(mFileMetaData);
458 }
459
flags() const460 uint32_t MPEG4Extractor::flags() const {
461 return CAN_PAUSE |
462 ((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
463 (CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
464 }
465
getMetaData(AMediaFormat * meta)466 media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
467 status_t err;
468 if ((err = readMetaData()) != OK) {
469 return AMEDIA_ERROR_UNKNOWN;
470 }
471 AMediaFormat_copy(meta, mFileMetaData);
472 return AMEDIA_OK;
473 }
474
countTracks()475 size_t MPEG4Extractor::countTracks() {
476 status_t err;
477 if ((err = readMetaData()) != OK) {
478 ALOGV("MPEG4Extractor::countTracks: no tracks");
479 return 0;
480 }
481
482 size_t n = 0;
483 Track *track = mFirstTrack;
484 while (track) {
485 ++n;
486 track = track->next;
487 }
488
489 ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
490 return n;
491 }
492
getTrackMetaData(AMediaFormat * meta,size_t index,uint32_t flags)493 media_status_t MPEG4Extractor::getTrackMetaData(
494 AMediaFormat *meta,
495 size_t index, uint32_t flags) {
496 status_t err;
497 if ((err = readMetaData()) != OK) {
498 return AMEDIA_ERROR_UNKNOWN;
499 }
500
501 Track *track = mFirstTrack;
502 while (index > 0) {
503 if (track == NULL) {
504 return AMEDIA_ERROR_UNKNOWN;
505 }
506
507 track = track->next;
508 --index;
509 }
510
511 if (track == NULL) {
512 return AMEDIA_ERROR_UNKNOWN;
513 }
514
515 [=] {
516 int64_t duration;
517 int32_t samplerate;
518 // Only for audio track.
519 if (track->elst_needs_processing && mHeaderTimescale != 0 &&
520 AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
521 AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
522 // Elst has to be processed only the first time this function is called.
523 track->elst_needs_processing = false;
524
525 if (track->elst_segment_duration > INT64_MAX) {
526 return;
527 }
528 int64_t segment_duration = track->elst_segment_duration;
529 int64_t media_time = track->elst_media_time;
530 int64_t halfscale = track->timescale / 2;
531
532 ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
533 ", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
534 segment_duration, media_time,
535 halfscale, mHeaderTimescale, track->timescale);
536
537 if ((uint32_t)samplerate != track->timescale){
538 ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
539 samplerate);
540 }
541 // Both delay and paddingsamples have to be set inorder for either to be
542 // effective in the lower layers.
543 int64_t delay = 0;
544 if (media_time > 0) { // Gapless playback
545 // delay = ((media_time * samplerate) + halfscale) / track->timescale;
546 if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
547 __builtin_add_overflow(delay, halfscale, &delay) ||
548 (delay /= track->timescale, false) ||
549 delay > INT32_MAX ||
550 delay < INT32_MIN) {
551 ALOGW("ignoring edit list with bogus values");
552 return;
553 }
554 }
555 ALOGV("delay = %" PRId64, delay);
556 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
557
558 int64_t paddingsamples = 0;
559 if (segment_duration > 0) {
560 int64_t scaled_duration;
561 // scaled_duration = duration * mHeaderTimescale;
562 if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
563 return;
564 }
565 ALOGV("scaled_duration = %" PRId64, scaled_duration);
566
567 int64_t segment_end;
568 int64_t padding;
569 int64_t segment_duration_e6;
570 int64_t media_time_scaled_e6;
571 int64_t media_time_scaled;
572 // padding = scaled_duration - ((segment_duration * 1000000) +
573 // ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
574 // segment_duration is based on timescale in movie header box(mdhd)
575 // media_time is based on timescale track header/media timescale
576 if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
577 __builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
578 __builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
579 return;
580 }
581 media_time_scaled_e6 /= track->timescale;
582 if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
583 || __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
584 return;
585 }
586 ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
587 // track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
588 // might be slightly shorter than the segment duration, which would make the
589 // padding negative. Clamp to zero.
590 if (padding > 0) {
591 int64_t halfscale_mht = mHeaderTimescale / 2;
592 int64_t halfscale_e6;
593 int64_t timescale_e6;
594 // paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
595 // / (mHeaderTimescale * 1000000);
596 if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
597 __builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
598 __builtin_mul_overflow(mHeaderTimescale, 1000000, ×cale_e6) ||
599 __builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
600 (paddingsamples /= timescale_e6, false) ||
601 paddingsamples > INT32_MAX) {
602 return;
603 }
604 }
605 }
606 ALOGV("paddingsamples = %" PRId64, paddingsamples);
607 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
608 }
609 }();
610
611 if ((flags & kIncludeExtensiveMetaData)
612 && !track->includes_expensive_metadata) {
613 track->includes_expensive_metadata = true;
614
615 const char *mime;
616 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
617 if (!strncasecmp("video/", mime, 6)) {
618 // MPEG2 tracks do not provide CSD, so read the stream header
619 if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
620 off64_t offset;
621 size_t size;
622 if (track->sampleTable->getMetaDataForSample(
623 0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
624 if (size > kMaxTrackHeaderSize) {
625 size = kMaxTrackHeaderSize;
626 }
627 uint8_t header[kMaxTrackHeaderSize];
628 if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
629 AMediaFormat_setBuffer(track->meta,
630 AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
631 }
632 }
633 }
634
635 if (mMoofOffset > 0) {
636 int64_t duration;
637 if (AMediaFormat_getInt64(track->meta,
638 AMEDIAFORMAT_KEY_DURATION, &duration)) {
639 // nothing fancy, just pick a frame near 1/4th of the duration
640 AMediaFormat_setInt64(track->meta,
641 AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
642 }
643 } else {
644 uint32_t sampleIndex;
645 uint64_t sampleTime;
646 if (track->timescale != 0 &&
647 track->sampleTable->findThumbnailSample(&sampleIndex) == OK
648 && track->sampleTable->getMetaDataForSample(
649 sampleIndex, NULL /* offset */, NULL /* size */,
650 &sampleTime) == OK) {
651 AMediaFormat_setInt64(track->meta,
652 AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
653 ((int64_t)sampleTime * 1000000) / track->timescale);
654 }
655 }
656 }
657 }
658
659 return AMediaFormat_copy(meta, track->meta);
660 }
661
readMetaData()662 status_t MPEG4Extractor::readMetaData() {
663 if (mInitCheck != NO_INIT) {
664 return mInitCheck;
665 }
666
667 off64_t offset = 0;
668 status_t err;
669 bool sawMoovOrSidx = false;
670
671 while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
672 (mIsHeif && (mPreferHeif || !mHasMoovBox) &&
673 (mItemTable != NULL) && mItemTable->isValid()))) {
674 off64_t orig_offset = offset;
675 err = parseChunk(&offset, 0);
676
677 if (err != OK && err != UNKNOWN_ERROR) {
678 break;
679 } else if (offset <= orig_offset) {
680 // only continue parsing if the offset was advanced,
681 // otherwise we might end up in an infinite loop
682 ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
683 err = ERROR_MALFORMED;
684 break;
685 } else if (err == UNKNOWN_ERROR) {
686 sawMoovOrSidx = true;
687 }
688 }
689
690 if ((mIsAvif || mIsHeif) && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
691 off64_t exifOffset;
692 size_t exifSize;
693 if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
694 AMediaFormat_setInt64(mFileMetaData,
695 AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
696 AMediaFormat_setInt64(mFileMetaData,
697 AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
698 }
699 off64_t xmpOffset;
700 size_t xmpSize;
701 if (mItemTable->getXmpOffsetAndSize(&xmpOffset, &xmpSize) == OK) {
702 // TODO(chz): b/175717339
703 // Use a hard-coded string here instead of named keys. The keys are available
704 // only on API 31+. The mp4 extractor is part of mainline and has min_sdk_version
705 // of 29. This hard-coded string can be replaced with the named constant once
706 // the mp4 extractor is built against API 31+.
707 AMediaFormat_setInt64(mFileMetaData,
708 "xmp-offset" /*AMEDIAFORMAT_KEY_XMP_OFFSET*/, (int64_t)xmpOffset);
709 AMediaFormat_setInt64(mFileMetaData,
710 "xmp-size" /*AMEDIAFORMAT_KEY_XMP_SIZE*/, (int64_t)xmpSize);
711 }
712 for (uint32_t imageIndex = 0;
713 imageIndex < mItemTable->countImages(); imageIndex++) {
714 AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
715 if (meta == NULL) {
716 ALOGE("heif image %u has no meta!", imageIndex);
717 continue;
718 }
719 // Some heif files advertise image sequence brands (eg. 'hevc') in
720 // ftyp box, but don't have any valid tracks in them. Instead of
721 // reporting the entire file as malformed, we override the error
722 // to allow still images to be extracted.
723 if (err != OK) {
724 ALOGW("Extracting still images only");
725 err = OK;
726 }
727 mInitCheck = OK;
728
729 ALOGV("adding %s image track %u", mIsHeif ? "HEIF" : "AVIF", imageIndex);
730 Track *track = new Track;
731 if (mLastTrack != NULL) {
732 mLastTrack->next = track;
733 } else {
734 mFirstTrack = track;
735 }
736 mLastTrack = track;
737
738 track->meta = meta;
739 AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
740 track->timescale = 1000000;
741 }
742 }
743
744 if (mInitCheck == OK) {
745 if (findTrackByMimePrefix("video/") != NULL) {
746 AMediaFormat_setString(mFileMetaData,
747 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
748 } else if (findTrackByMimePrefix("audio/") != NULL) {
749 AMediaFormat_setString(mFileMetaData,
750 AMEDIAFORMAT_KEY_MIME, "audio/mp4");
751 } else if (findTrackByMimePrefix(
752 MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
753 AMediaFormat_setString(mFileMetaData,
754 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
755 } else if (findTrackByMimePrefix(
756 MEDIA_MIMETYPE_IMAGE_AVIF) != NULL) {
757 AMediaFormat_setString(mFileMetaData,
758 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_IMAGE_AVIF);
759 } else {
760 AMediaFormat_setString(mFileMetaData,
761 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
762 }
763 } else {
764 mInitCheck = err;
765 }
766
767 CHECK_NE(err, (status_t)NO_INIT);
768
769 // copy pssh data into file metadata
770 uint64_t psshsize = 0;
771 for (size_t i = 0; i < mPssh.size(); i++) {
772 psshsize += 20 + mPssh[i].datalen;
773 }
774 if (psshsize > 0 && psshsize <= UINT32_MAX) {
775 char *buf = (char*)malloc(psshsize);
776 if (!buf) {
777 ALOGE("b/28471206");
778 return NO_MEMORY;
779 }
780 char *ptr = buf;
781 for (size_t i = 0; i < mPssh.size(); i++) {
782 memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
783 memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
784 ptr += (20 + mPssh[i].datalen);
785 }
786 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
787 free(buf);
788 }
789
790 return mInitCheck;
791 }
792
793 struct PathAdder {
PathAdderandroid::PathAdder794 PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
795 : mPath(path) {
796 mPath->push(chunkType);
797 }
798
~PathAdderandroid::PathAdder799 ~PathAdder() {
800 mPath->pop();
801 }
802
803 private:
804 Vector<uint32_t> *mPath;
805
806 PathAdder(const PathAdder &);
807 PathAdder &operator=(const PathAdder &);
808 };
809
underMetaDataPath(const Vector<uint32_t> & path)810 static bool underMetaDataPath(const Vector<uint32_t> &path) {
811 return path.size() >= 5
812 && path[0] == FOURCC("moov")
813 && path[1] == FOURCC("udta")
814 && path[2] == FOURCC("meta")
815 && path[3] == FOURCC("ilst");
816 }
817
underQTMetaPath(const Vector<uint32_t> & path,int32_t depth)818 static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
819 return path.size() >= 2
820 && path[0] == FOURCC("moov")
821 && path[1] == FOURCC("meta")
822 && (depth == 2
823 || (depth == 3
824 && (path[2] == FOURCC("hdlr")
825 || path[2] == FOURCC("ilst")
826 || path[2] == FOURCC("keys"))));
827 }
828
829 // Given a time in seconds since Jan 1 1904, produce a human-readable string.
convertTimeToDate(int64_t time_1904,String8 * s)830 static bool convertTimeToDate(int64_t time_1904, String8 *s) {
831 // delta between mpeg4 time and unix epoch time
832 static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
833 if (time_1904 < INT64_MIN + delta) {
834 return false;
835 }
836 time_t time_1970 = time_1904 - delta;
837
838 char tmp[32];
839 struct tm* tm = gmtime(&time_1970);
840 if (tm != NULL &&
841 strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
842 s->setTo(tmp);
843 return true;
844 }
845 return false;
846 }
847
parseChunk(off64_t * offset,int depth)848 status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
849 ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
850
851 if (*offset < 0) {
852 ALOGE("b/23540914");
853 return ERROR_MALFORMED;
854 }
855 if (depth > 100) {
856 ALOGE("b/27456299");
857 return ERROR_MALFORMED;
858 }
859 uint32_t hdr[2];
860 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
861 return ERROR_IO;
862 }
863 uint64_t chunk_size = ntohl(hdr[0]);
864 int32_t chunk_type = ntohl(hdr[1]);
865 off64_t data_offset = *offset + 8;
866
867 if (chunk_size == 1) {
868 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
869 return ERROR_IO;
870 }
871 chunk_size = ntoh64(chunk_size);
872 data_offset += 8;
873
874 if (chunk_size < 16) {
875 // The smallest valid chunk is 16 bytes long in this case.
876 return ERROR_MALFORMED;
877 }
878 } else if (chunk_size == 0) {
879 if (depth == 0) {
880 // atom extends to end of file
881 off64_t sourceSize;
882 if (mDataSource->getSize(&sourceSize) == OK) {
883 chunk_size = (sourceSize - *offset);
884 } else {
885 // XXX could we just pick a "sufficiently large" value here?
886 ALOGE("atom size is 0, and data source has no size");
887 return ERROR_MALFORMED;
888 }
889 } else {
890 // not allowed for non-toplevel atoms, skip it
891 *offset += 4;
892 return OK;
893 }
894 } else if (chunk_size < 8) {
895 // The smallest valid chunk is 8 bytes long.
896 ALOGE("invalid chunk size: %" PRIu64, chunk_size);
897 return ERROR_MALFORMED;
898 }
899
900 char chunk[5];
901 MakeFourCCString(chunk_type, chunk);
902 ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
903
904 if (kUseHexDump) {
905 static const char kWhitespace[] = " ";
906 const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
907 printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
908
909 char buffer[256];
910 size_t n = chunk_size;
911 if (n > sizeof(buffer)) {
912 n = sizeof(buffer);
913 }
914 if (mDataSource->readAt(*offset, buffer, n)
915 < (ssize_t)n) {
916 return ERROR_IO;
917 }
918
919 hexdump(buffer, n);
920 }
921
922 PathAdder autoAdder(&mPath, chunk_type);
923
924 // (data_offset - *offset) is either 8 or 16
925 off64_t chunk_data_size = chunk_size - (data_offset - *offset);
926 if (chunk_data_size < 0) {
927 ALOGE("b/23540914");
928 return ERROR_MALFORMED;
929 }
930 if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
931 char errMsg[100];
932 sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
933 ALOGE("%s (b/28615448)", errMsg);
934 android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
935 return ERROR_MALFORMED;
936 }
937
938 if (chunk_type != FOURCC("cprt")
939 && chunk_type != FOURCC("covr")
940 && mPath.size() == 5 && underMetaDataPath(mPath)) {
941 off64_t stop_offset = *offset + chunk_size;
942 *offset = data_offset;
943 while (*offset < stop_offset) {
944 status_t err = parseChunk(offset, depth + 1);
945 if (err != OK) {
946 return err;
947 }
948 }
949
950 if (*offset != stop_offset) {
951 return ERROR_MALFORMED;
952 }
953
954 return OK;
955 }
956
957 switch(chunk_type) {
958 case FOURCC("moov"):
959 case FOURCC("trak"):
960 case FOURCC("mdia"):
961 case FOURCC("minf"):
962 case FOURCC("dinf"):
963 case FOURCC("stbl"):
964 case FOURCC("mvex"):
965 case FOURCC("moof"):
966 case FOURCC("traf"):
967 case FOURCC("mfra"):
968 case FOURCC("udta"):
969 case FOURCC("ilst"):
970 case FOURCC("sinf"):
971 case FOURCC("schi"):
972 case FOURCC("edts"):
973 case FOURCC("wave"):
974 {
975 if (chunk_type == FOURCC("moov") && depth != 0) {
976 ALOGE("moov: depth %d", depth);
977 return ERROR_MALFORMED;
978 }
979
980 if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
981 ALOGE("duplicate moov");
982 return ERROR_MALFORMED;
983 }
984
985 if (chunk_type == FOURCC("moof") && !mMoofFound) {
986 // store the offset of the first segment
987 mMoofFound = true;
988 mMoofOffset = *offset;
989 }
990
991 if (chunk_type == FOURCC("stbl")) {
992 ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
993
994 if (mDataSource->flags()
995 & (DataSourceBase::kWantsPrefetching
996 | DataSourceBase::kIsCachingDataSource)) {
997 CachedRangedDataSource *cachedSource =
998 new CachedRangedDataSource(mDataSource);
999
1000 if (cachedSource->setCachedRange(
1001 *offset, chunk_size,
1002 true /* assume ownership on success */) == OK) {
1003 mDataSource = cachedSource;
1004 } else {
1005 delete cachedSource;
1006 }
1007 }
1008
1009 if (mLastTrack == NULL) {
1010 return ERROR_MALFORMED;
1011 }
1012
1013 mLastTrack->sampleTable = new SampleTable(mDataSource);
1014 }
1015
1016 bool isTrack = false;
1017 if (chunk_type == FOURCC("trak")) {
1018 if (depth != 1) {
1019 ALOGE("trak: depth %d", depth);
1020 return ERROR_MALFORMED;
1021 }
1022 isTrack = true;
1023
1024 ALOGV("adding new track");
1025 Track *track = new Track;
1026 if (mLastTrack) {
1027 mLastTrack->next = track;
1028 } else {
1029 mFirstTrack = track;
1030 }
1031 mLastTrack = track;
1032
1033 track->meta = AMediaFormat_new();
1034 AMediaFormat_setString(track->meta,
1035 AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
1036 }
1037
1038 off64_t stop_offset = *offset + chunk_size;
1039 *offset = data_offset;
1040 while (*offset < stop_offset) {
1041
1042 // pass udata terminate
1043 if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
1044 // handle the case that udta terminates with terminate code x00000000
1045 // note that 0 terminator is optional and we just handle this case.
1046 uint32_t terminate_code = 1;
1047 mDataSource->readAt(*offset, &terminate_code, 4);
1048 if (0 == terminate_code) {
1049 *offset += 4;
1050 ALOGD("Terminal code for udta");
1051 continue;
1052 } else {
1053 ALOGW("invalid udta Terminal code");
1054 }
1055 }
1056
1057 status_t err = parseChunk(offset, depth + 1);
1058 if (err != OK) {
1059 if (isTrack) {
1060 mLastTrack->skipTrack = true;
1061 break;
1062 }
1063 return err;
1064 }
1065 }
1066
1067 if (*offset != stop_offset) {
1068 return ERROR_MALFORMED;
1069 }
1070
1071 if (isTrack) {
1072 int32_t trackId;
1073 // There must be exactly one track header per track.
1074
1075 if (!AMediaFormat_getInt32(mLastTrack->meta,
1076 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
1077 mLastTrack->skipTrack = true;
1078 }
1079
1080 status_t err = verifyTrack(mLastTrack);
1081 if (err != OK) {
1082 mLastTrack->skipTrack = true;
1083 }
1084
1085
1086 if (mLastTrack->skipTrack) {
1087 ALOGV("skipping this track...");
1088 Track *cur = mFirstTrack;
1089
1090 if (cur == mLastTrack) {
1091 delete cur;
1092 mFirstTrack = mLastTrack = NULL;
1093 } else {
1094 while (cur && cur->next != mLastTrack) {
1095 cur = cur->next;
1096 }
1097 if (cur) {
1098 cur->next = NULL;
1099 }
1100 delete mLastTrack;
1101 mLastTrack = cur;
1102 }
1103
1104 return OK;
1105 }
1106
1107 // place things we built elsewhere into their final locations
1108
1109 // put aggregated tx3g data into the metadata
1110 if (mLastTrack->mTx3gFilled > 0) {
1111 ALOGV("Putting %zu bytes of tx3g data into meta data",
1112 mLastTrack->mTx3gFilled);
1113 AMediaFormat_setBuffer(mLastTrack->meta,
1114 AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
1115 mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
1116 // drop it now to reduce our footprint
1117 free(mLastTrack->mTx3gBuffer);
1118 mLastTrack->mTx3gBuffer = NULL;
1119 mLastTrack->mTx3gFilled = 0;
1120 mLastTrack->mTx3gSize = 0;
1121 }
1122
1123 const char *mime;
1124 AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
1125
1126 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
1127 void *data;
1128 size_t size;
1129
1130 if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
1131 &data, &size)
1132 && size >= 5) {
1133 const uint8_t *ptr = (const uint8_t *)data;
1134 const uint8_t profile = ptr[2] >> 1;
1135 const uint8_t bl_compatibility_id = (ptr[4]) >> 4;
1136 bool create_two_tracks = false;
1137
1138 if (bl_compatibility_id && bl_compatibility_id != 15) {
1139 create_two_tracks = true;
1140 }
1141
1142 if (4 == profile || 7 == profile ||
1143 (profile >= 8 && profile < 11 && create_two_tracks)) {
1144 // we need a backward compatible track
1145 ALOGV("Adding new backward compatible track");
1146 Track *track_b = new Track;
1147
1148 track_b->timescale = mLastTrack->timescale;
1149 track_b->sampleTable = mLastTrack->sampleTable;
1150 track_b->includes_expensive_metadata = mLastTrack->includes_expensive_metadata;
1151 track_b->skipTrack = mLastTrack->skipTrack;
1152 track_b->elst_needs_processing = mLastTrack->elst_needs_processing;
1153 track_b->elst_media_time = mLastTrack->elst_media_time;
1154 track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
1155 track_b->elst_shift_start_ticks = mLastTrack->elst_shift_start_ticks;
1156 track_b->elst_initial_empty_edit_ticks = mLastTrack->elst_initial_empty_edit_ticks;
1157 track_b->subsample_encryption = mLastTrack->subsample_encryption;
1158
1159 track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
1160 track_b->mTx3gSize = mLastTrack->mTx3gSize;
1161 track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
1162
1163 track_b->meta = AMediaFormat_new();
1164 AMediaFormat_copy(track_b->meta, mLastTrack->meta);
1165
1166 mLastTrack->next = track_b;
1167 track_b->next = NULL;
1168
1169 // we want to remove the csd-2 key from the metadata, but
1170 // don't have an AMediaFormat_* function to do so. Settle
1171 // for replacing this csd-2 with an empty csd-2.
1172 uint8_t emptybuffer[8] = {};
1173 AMediaFormat_setBuffer(track_b->meta, AMEDIAFORMAT_KEY_CSD_2,
1174 emptybuffer, 0);
1175
1176 if (4 == profile || 7 == profile || 8 == profile ) {
1177 AMediaFormat_setString(track_b->meta,
1178 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
1179 } else if (9 == profile) {
1180 AMediaFormat_setString(track_b->meta,
1181 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
1182 } else if (10 == profile) {
1183 AMediaFormat_setString(track_b->meta,
1184 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AV1);
1185 } // Should never get to else part
1186
1187 mLastTrack = track_b;
1188 }
1189 }
1190 }
1191 } else if (chunk_type == FOURCC("moov")) {
1192 mInitCheck = OK;
1193
1194 return UNKNOWN_ERROR; // Return a generic error.
1195 }
1196 break;
1197 }
1198
1199 case FOURCC("schm"):
1200 {
1201
1202 *offset += chunk_size;
1203 if (!mLastTrack) {
1204 return ERROR_MALFORMED;
1205 }
1206
1207 uint32_t scheme_type;
1208 if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
1209 return ERROR_IO;
1210 }
1211 scheme_type = ntohl(scheme_type);
1212 int32_t mode = kCryptoModeUnencrypted;
1213 switch(scheme_type) {
1214 case FOURCC("cbc1"):
1215 {
1216 mode = kCryptoModeAesCbc;
1217 break;
1218 }
1219 case FOURCC("cbcs"):
1220 {
1221 mode = kCryptoModeAesCbc;
1222 mLastTrack->subsample_encryption = true;
1223 break;
1224 }
1225 case FOURCC("cenc"):
1226 {
1227 mode = kCryptoModeAesCtr;
1228 break;
1229 }
1230 case FOURCC("cens"):
1231 {
1232 mode = kCryptoModeAesCtr;
1233 mLastTrack->subsample_encryption = true;
1234 break;
1235 }
1236 }
1237 if (mode != kCryptoModeUnencrypted) {
1238 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
1239 }
1240 break;
1241 }
1242
1243
1244 case FOURCC("elst"):
1245 {
1246 *offset += chunk_size;
1247
1248 if (!mLastTrack) {
1249 return ERROR_MALFORMED;
1250 }
1251
1252 // See 14496-12 8.6.6
1253 uint8_t version;
1254 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
1255 return ERROR_IO;
1256 }
1257
1258 uint32_t entry_count;
1259 if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
1260 return ERROR_IO;
1261 }
1262
1263 if (entry_count > 2) {
1264 /* We support a single entry for gapless playback or negating offset for
1265 * reordering B frames, two entries (empty edit) for start offset at the moment.
1266 */
1267 ALOGW("ignoring edit list with %d entries", entry_count);
1268 } else {
1269 off64_t entriesoffset = data_offset + 8;
1270 uint64_t segment_duration;
1271 int64_t media_time;
1272 bool empty_edit_present = false;
1273 for (int i = 0; i < entry_count; ++i) {
1274 switch (version) {
1275 case 0: {
1276 uint32_t sd;
1277 int32_t mt;
1278 if (!mDataSource->getUInt32(entriesoffset, &sd) ||
1279 !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
1280 return ERROR_IO;
1281 }
1282 segment_duration = sd;
1283 media_time = mt;
1284 // 4(segment duration) + 4(media time) + 4(media rate)
1285 entriesoffset += 12;
1286 break;
1287 }
1288 case 1: {
1289 if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
1290 !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
1291 return ERROR_IO;
1292 }
1293 // 8(segment duration) + 8(media time) + 4(media rate)
1294 entriesoffset += 20;
1295 break;
1296 }
1297 default:
1298 return ERROR_IO;
1299 break;
1300 }
1301 // Empty edit entry would have to be first entry.
1302 if (media_time == -1 && i == 0) {
1303 empty_edit_present = true;
1304 ALOGV("initial empty edit ticks: %" PRIu64, segment_duration);
1305 /* In movie header timescale, and needs to be converted to media timescale
1306 * after we get that from a track's 'mdhd' atom,
1307 * which at times come after 'elst'.
1308 */
1309 mLastTrack->elst_initial_empty_edit_ticks = segment_duration;
1310 } else if (media_time >= 0 && i == 0) {
1311 ALOGV("first edit list entry - from gapless playback files");
1312 mLastTrack->elst_media_time = media_time;
1313 mLastTrack->elst_segment_duration = segment_duration;
1314 ALOGV("segment_duration: %" PRIu64 " media_time: %" PRId64,
1315 segment_duration, media_time);
1316 // media_time is in media timescale as are STTS/CTTS entries.
1317 mLastTrack->elst_shift_start_ticks = media_time;
1318 } else if (empty_edit_present && i == 1) {
1319 // Process second entry only when the first entry was an empty edit entry.
1320 ALOGV("second edit list entry");
1321 mLastTrack->elst_shift_start_ticks = media_time;
1322 } else {
1323 ALOGW("for now, unsupported entry in edit list %" PRIu32, entry_count);
1324 }
1325 }
1326 // save these for later, because the elst atom might precede
1327 // the atoms that actually gives us the duration and sample rate
1328 // needed to calculate the padding and delay values
1329 mLastTrack->elst_needs_processing = true;
1330 }
1331 break;
1332 }
1333
1334 case FOURCC("frma"):
1335 {
1336 *offset += chunk_size;
1337
1338 uint32_t original_fourcc;
1339 if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
1340 return ERROR_IO;
1341 }
1342 original_fourcc = ntohl(original_fourcc);
1343 ALOGV("read original format: %d", original_fourcc);
1344
1345 if (mLastTrack == NULL) {
1346 return ERROR_MALFORMED;
1347 }
1348
1349 AMediaFormat_setString(mLastTrack->meta,
1350 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
1351 uint32_t num_channels = 0;
1352 uint32_t sample_rate = 0;
1353 if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
1354 AMediaFormat_setInt32(mLastTrack->meta,
1355 AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1356 AMediaFormat_setInt32(mLastTrack->meta,
1357 AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1358 }
1359
1360 if (!mIsQT && original_fourcc == FOURCC("alac")) {
1361 off64_t tmpOffset = *offset;
1362 status_t err = parseALACSampleEntry(&tmpOffset);
1363 if (err != OK) {
1364 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1365 return err;
1366 }
1367 *offset = tmpOffset + 8;
1368 }
1369
1370 break;
1371 }
1372
1373 case FOURCC("tenc"):
1374 {
1375 *offset += chunk_size;
1376
1377 if (chunk_size < 32) {
1378 return ERROR_MALFORMED;
1379 }
1380
1381 // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
1382 // default IV size, 16 bytes default KeyID
1383 // (ISO 23001-7)
1384
1385 uint8_t version;
1386 if (mDataSource->readAt(data_offset, &version, sizeof(version))
1387 < (ssize_t)sizeof(version)) {
1388 return ERROR_IO;
1389 }
1390
1391 uint8_t buf[4];
1392 memset(buf, 0, 4);
1393 if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
1394 return ERROR_IO;
1395 }
1396
1397 if (mLastTrack == NULL) {
1398 return ERROR_MALFORMED;
1399 }
1400
1401 uint8_t defaultEncryptedByteBlock = 0;
1402 uint8_t defaultSkipByteBlock = 0;
1403 uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
1404 if (version == 1) {
1405 uint32_t pattern = buf[2];
1406 defaultEncryptedByteBlock = pattern >> 4;
1407 defaultSkipByteBlock = pattern & 0xf;
1408 if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
1409 // use (1,0) to mean "encrypt everything"
1410 defaultEncryptedByteBlock = 1;
1411 }
1412 } else if (mLastTrack->subsample_encryption) {
1413 ALOGW("subsample_encryption should be version 1");
1414 } else if (defaultAlgorithmId > 1) {
1415 // only 0 (clear) and 1 (AES-128) are valid
1416 ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
1417 defaultAlgorithmId = 1;
1418 }
1419
1420 memset(buf, 0, 4);
1421 if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
1422 return ERROR_IO;
1423 }
1424 uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
1425
1426 if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
1427 // only unencrypted data must have 0 IV size
1428 return ERROR_MALFORMED;
1429 } else if (defaultIVSize != 0 &&
1430 defaultIVSize != 8 &&
1431 defaultIVSize != 16) {
1432 return ERROR_MALFORMED;
1433 }
1434
1435 uint8_t defaultKeyId[16];
1436
1437 if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1438 return ERROR_IO;
1439 }
1440
1441 sp<ABuffer> defaultConstantIv;
1442 if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
1443
1444 uint8_t ivlength;
1445 if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
1446 < (ssize_t)sizeof(ivlength)) {
1447 return ERROR_IO;
1448 }
1449
1450 if (ivlength != 8 && ivlength != 16) {
1451 ALOGW("unsupported IV length: %u", ivlength);
1452 return ERROR_MALFORMED;
1453 }
1454
1455 defaultConstantIv = new ABuffer(ivlength);
1456 if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
1457 < (ssize_t)ivlength) {
1458 return ERROR_IO;
1459 }
1460
1461 defaultConstantIv->setRange(0, ivlength);
1462 }
1463
1464 int32_t tmpAlgorithmId;
1465 if (!AMediaFormat_getInt32(mLastTrack->meta,
1466 AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
1467 AMediaFormat_setInt32(mLastTrack->meta,
1468 AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
1469 }
1470
1471 AMediaFormat_setInt32(mLastTrack->meta,
1472 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
1473 AMediaFormat_setBuffer(mLastTrack->meta,
1474 AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
1475 AMediaFormat_setInt32(mLastTrack->meta,
1476 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
1477 AMediaFormat_setInt32(mLastTrack->meta,
1478 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
1479 if (defaultConstantIv != NULL) {
1480 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
1481 defaultConstantIv->data(), defaultConstantIv->size());
1482 }
1483 break;
1484 }
1485
1486 case FOURCC("tkhd"):
1487 {
1488 *offset += chunk_size;
1489
1490 status_t err;
1491 if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1492 return err;
1493 }
1494
1495 break;
1496 }
1497
1498 case FOURCC("tref"):
1499 {
1500 off64_t stop_offset = *offset + chunk_size;
1501 *offset = data_offset;
1502 while (*offset < stop_offset) {
1503 status_t err = parseChunk(offset, depth + 1);
1504 if (err != OK) {
1505 return err;
1506 }
1507 }
1508 if (*offset != stop_offset) {
1509 return ERROR_MALFORMED;
1510 }
1511 break;
1512 }
1513
1514 case FOURCC("thmb"):
1515 {
1516 *offset += chunk_size;
1517
1518 if (mLastTrack != NULL) {
1519 // Skip thumbnail track for now since we don't have an
1520 // API to retrieve it yet.
1521 // The thumbnail track can't be accessed by negative index or time,
1522 // because each timed sample has its own corresponding thumbnail
1523 // in the thumbnail track. We'll need a dedicated API to retrieve
1524 // thumbnail at time instead.
1525 mLastTrack->skipTrack = true;
1526 }
1527
1528 break;
1529 }
1530
1531 case FOURCC("pssh"):
1532 {
1533 *offset += chunk_size;
1534
1535 PsshInfo pssh;
1536
1537 if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1538 return ERROR_IO;
1539 }
1540
1541 uint32_t psshdatalen = 0;
1542 if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
1543 return ERROR_IO;
1544 }
1545 pssh.datalen = ntohl(psshdatalen);
1546 ALOGV("pssh data size: %d", pssh.datalen);
1547 if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
1548 // pssh data length exceeds size of containing box
1549 return ERROR_MALFORMED;
1550 }
1551
1552 pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
1553 if (pssh.data == NULL) {
1554 return ERROR_MALFORMED;
1555 }
1556 ALOGV("allocated pssh @ %p", pssh.data);
1557 ssize_t requested = (ssize_t) pssh.datalen;
1558 if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
1559 delete[] pssh.data;
1560 return ERROR_IO;
1561 }
1562 mPssh.push_back(pssh);
1563
1564 break;
1565 }
1566
1567 case FOURCC("mdhd"):
1568 {
1569 *offset += chunk_size;
1570
1571 if (chunk_data_size < 4 || mLastTrack == NULL) {
1572 return ERROR_MALFORMED;
1573 }
1574
1575 uint8_t version;
1576 if (mDataSource->readAt(
1577 data_offset, &version, sizeof(version))
1578 < (ssize_t)sizeof(version)) {
1579 return ERROR_IO;
1580 }
1581
1582 off64_t timescale_offset;
1583
1584 if (version == 1) {
1585 timescale_offset = data_offset + 4 + 16;
1586 } else if (version == 0) {
1587 timescale_offset = data_offset + 4 + 8;
1588 } else {
1589 return ERROR_IO;
1590 }
1591
1592 uint32_t timescale;
1593 if (mDataSource->readAt(
1594 timescale_offset, ×cale, sizeof(timescale))
1595 < (ssize_t)sizeof(timescale)) {
1596 return ERROR_IO;
1597 }
1598
1599 if (!timescale) {
1600 ALOGE("timescale should not be ZERO.");
1601 return ERROR_MALFORMED;
1602 }
1603
1604 mLastTrack->timescale = ntohl(timescale);
1605
1606 // 14496-12 says all ones means indeterminate, but some files seem to use
1607 // 0 instead. We treat both the same.
1608 int64_t duration = 0;
1609 if (version == 1) {
1610 if (mDataSource->readAt(
1611 timescale_offset + 4, &duration, sizeof(duration))
1612 < (ssize_t)sizeof(duration)) {
1613 return ERROR_IO;
1614 }
1615 if (duration != -1) {
1616 duration = ntoh64(duration);
1617 }
1618 } else {
1619 uint32_t duration32;
1620 if (mDataSource->readAt(
1621 timescale_offset + 4, &duration32, sizeof(duration32))
1622 < (ssize_t)sizeof(duration32)) {
1623 return ERROR_IO;
1624 }
1625 if (duration32 != 0xffffffff) {
1626 duration = ntohl(duration32);
1627 }
1628 }
1629 if (duration != 0 && mLastTrack->timescale != 0) {
1630 long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
1631 if (durationUs < 0 || durationUs > INT64_MAX) {
1632 ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
1633 (long long) duration, (long long) mLastTrack->timescale);
1634 return ERROR_MALFORMED;
1635 }
1636 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
1637 }
1638
1639 uint8_t lang[2];
1640 off64_t lang_offset;
1641 if (version == 1) {
1642 lang_offset = timescale_offset + 4 + 8;
1643 } else if (version == 0) {
1644 lang_offset = timescale_offset + 4 + 4;
1645 } else {
1646 return ERROR_IO;
1647 }
1648
1649 if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1650 < (ssize_t)sizeof(lang)) {
1651 return ERROR_IO;
1652 }
1653
1654 // To get the ISO-639-2/T three character language code
1655 // 1 bit pad followed by 3 5-bits characters. Each character
1656 // is packed as the difference between its ASCII value and 0x60.
1657 char lang_code[4];
1658 lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1659 lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1660 lang_code[2] = (lang[1] & 0x1f) + 0x60;
1661 lang_code[3] = '\0';
1662
1663 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
1664
1665 break;
1666 }
1667
1668 case FOURCC("stsd"):
1669 {
1670 uint8_t buffer[8];
1671 if (chunk_data_size < (off64_t)sizeof(buffer)) {
1672 return ERROR_MALFORMED;
1673 }
1674
1675 if (mDataSource->readAt(
1676 data_offset, buffer, 8) < 8) {
1677 return ERROR_IO;
1678 }
1679
1680 if (U32_AT(buffer) != 0) {
1681 // Should be version 0, flags 0.
1682 return ERROR_MALFORMED;
1683 }
1684
1685 uint32_t entry_count = U32_AT(&buffer[4]);
1686
1687 if (entry_count > 1) {
1688 // For 3GPP timed text, there could be multiple tx3g boxes contain
1689 // multiple text display formats. These formats will be used to
1690 // display the timed text.
1691 // For encrypted files, there may also be more than one entry.
1692 const char *mime;
1693
1694 if (mLastTrack == NULL)
1695 return ERROR_MALFORMED;
1696
1697 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
1698 if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1699 strcasecmp(mime, "application/octet-stream")) {
1700 // For now we only support a single type of media per track.
1701 mLastTrack->skipTrack = true;
1702 *offset += chunk_size;
1703 break;
1704 }
1705 }
1706 off64_t stop_offset = *offset + chunk_size;
1707 *offset = data_offset + 8;
1708 for (uint32_t i = 0; i < entry_count; ++i) {
1709 status_t err = parseChunk(offset, depth + 1);
1710 if (err != OK) {
1711 return err;
1712 }
1713 }
1714
1715 if (*offset != stop_offset) {
1716 return ERROR_MALFORMED;
1717 }
1718 break;
1719 }
1720 case FOURCC("mett"):
1721 {
1722 *offset += chunk_size;
1723
1724 // the absolute minimum size of a compliant mett box is 11 bytes:
1725 // 6 byte reserved, 2 byte index, null byte, one char mime_format, null byte
1726 // The resulting mime_format would be invalid at that size though.
1727 if (mLastTrack == NULL || chunk_data_size < 11) {
1728 return ERROR_MALFORMED;
1729 }
1730
1731 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
1732 if (buffer.get() == NULL) {
1733 return NO_MEMORY;
1734 }
1735
1736 if (mDataSource->readAt(
1737 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
1738 return ERROR_IO;
1739 }
1740
1741 // ISO-14496-12:
1742 // int8 reserved[6]; // should be all zeroes
1743 // int16_t data_reference_index;
1744 // char content_encoding[]; // null terminated, optional (= just the null byte)
1745 // char mime_format[]; // null terminated, mandatory
1746 // optional other boxes
1747 //
1748 // API < 29:
1749 // char mime_format[]; // null terminated
1750 //
1751 // API >= 29
1752 // char mime_format[]; // null terminated
1753 // char mime_format[]; // null terminated
1754
1755 // Prior to API 29, the metadata track was not compliant with ISO/IEC
1756 // 14496-12-2015. This led to some ISO-compliant parsers failing to read the
1757 // metatrack. As of API 29 and onwards, a change was made to metadata track to
1758 // make it somewhat compatible with the standard. The workaround is to write the
1759 // null-terminated mime_format string twice. This allows compliant parsers to
1760 // read the missing reserved, data_reference_index, and content_encoding fields
1761 // from the first mime_type string. The actual mime_format field would then be
1762 // read correctly from the second string. The non-compliant Android frameworks
1763 // from API 28 and earlier would still be able to read the mime_format correctly
1764 // as it would only read the first null-terminated mime_format string. To enable
1765 // reading metadata tracks generated from both the non-compliant and compliant
1766 // formats, a check needs to be done to see which format is used.
1767 const char *str = (const char*) buffer.get();
1768 size_t string_length = strnlen(str, chunk_data_size);
1769
1770 if (string_length == chunk_data_size - 1) {
1771 // This is likely a pre API 29 file, since it's a single null terminated
1772 // string filling the entire box.
1773 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, str);
1774 } else {
1775 // This might be a fully compliant metadata track, a "double mime" compatibility
1776 // track, or anything else, including a single non-terminated string, so we need
1777 // to determine the length of each string we want to parse out of the box.
1778 size_t encoding_length = strnlen(str + 8, chunk_data_size - 8);
1779 if (encoding_length + 8 >= chunk_data_size - 2) {
1780 // the encoding extends to the end of the box, so there's no mime_format
1781 return ERROR_MALFORMED;
1782 }
1783 String8 contentEncoding(str + 8, encoding_length);
1784 String8 mimeFormat(str + 8 + encoding_length + 1,
1785 chunk_data_size - 8 - encoding_length - 1);
1786 AMediaFormat_setString(mLastTrack->meta,
1787 AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
1788 }
1789 break;
1790 }
1791
1792 case FOURCC("mp4a"):
1793 case FOURCC("enca"):
1794 case FOURCC("samr"):
1795 case FOURCC("sawb"):
1796 case FOURCC("Opus"):
1797 case FOURCC("twos"):
1798 case FOURCC("sowt"):
1799 case FOURCC("alac"):
1800 case FOURCC("fLaC"):
1801 case FOURCC(".mp3"):
1802 case 0x6D730055: // "ms U" mp3 audio
1803 case FOURCC("mha1"):
1804 case FOURCC("mhm1"):
1805 {
1806 if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
1807
1808 if (chunk_type == FOURCC("alac")) {
1809 off64_t offsetTmp = *offset;
1810 status_t err = parseALACSampleEntry(&offsetTmp);
1811 if (err != OK) {
1812 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1813 return err;
1814 }
1815 }
1816
1817 // Ignore all atoms embedded in QT wave atom
1818 ALOGV("Ignore all atoms embedded in QT wave atom");
1819 *offset += chunk_size;
1820 break;
1821 }
1822
1823 uint8_t buffer[8 + 20];
1824 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1825 // Basic AudioSampleEntry size.
1826 return ERROR_MALFORMED;
1827 }
1828
1829 if (mDataSource->readAt(
1830 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1831 return ERROR_IO;
1832 }
1833
1834 // we can get data_ref_index value from U16_AT(&buffer[6])
1835 uint16_t version = U16_AT(&buffer[8]);
1836 uint32_t num_channels = U16_AT(&buffer[16]);
1837
1838 uint16_t sample_size = U16_AT(&buffer[18]);
1839 uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1840
1841 if (mLastTrack == NULL)
1842 return ERROR_MALFORMED;
1843
1844 off64_t stop_offset = *offset + chunk_size;
1845 *offset = data_offset + sizeof(buffer);
1846
1847 if (mIsQT) {
1848 if (version == 1) {
1849 if (mDataSource->readAt(*offset, buffer, 16) < 16) {
1850 return ERROR_IO;
1851 }
1852
1853 #if 0
1854 U32_AT(buffer); // samples per packet
1855 U32_AT(&buffer[4]); // bytes per packet
1856 U32_AT(&buffer[8]); // bytes per frame
1857 U32_AT(&buffer[12]); // bytes per sample
1858 #endif
1859 *offset += 16;
1860 } else if (version == 2) {
1861 uint8_t v2buffer[36];
1862 if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
1863 return ERROR_IO;
1864 }
1865
1866 #if 0
1867 U32_AT(v2buffer); // size of struct only
1868 sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
1869 num_channels = U32_AT(&v2buffer[12]); // num audio channels
1870 U32_AT(&v2buffer[16]); // always 0x7f000000
1871 sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
1872 U32_AT(&v2buffer[24]); // format specifc flags
1873 U32_AT(&v2buffer[28]); // const bytes per audio packet
1874 U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
1875 #endif
1876 *offset += 36;
1877 }
1878 }
1879
1880 if (chunk_type != FOURCC("enca")) {
1881 // if the chunk type is enca, we'll get the type from the frma box later
1882 AMediaFormat_setString(mLastTrack->meta,
1883 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
1884 AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1885
1886 if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
1887 AMediaFormat_setInt32(mLastTrack->meta,
1888 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
1889 if (chunk_type == FOURCC("twos")) {
1890 AMediaFormat_setInt32(mLastTrack->meta,
1891 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
1892 }
1893 }
1894 }
1895 ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1896 chunk, num_channels, sample_size, sample_rate);
1897 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
1898 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
1899
1900 if (chunk_type == FOURCC("Opus")) {
1901 uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
1902 data_offset += sizeof(buffer);
1903 size_t opusInfoSize = chunk_data_size - sizeof(buffer);
1904
1905 if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
1906 opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
1907 return ERROR_MALFORMED;
1908 }
1909 // Read Opus Header
1910 if (mDataSource->readAt(
1911 data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
1912 return ERROR_IO;
1913 }
1914
1915 // OpusHeader must start with this magic sequence, overwrite first 8 bytes
1916 // http://wiki.xiph.org/OggOpus#ID_Header
1917 strncpy((char *)opusInfo, "OpusHead", 8);
1918
1919 // Version shall be 0 as per mp4 Opus Specific Box
1920 // (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
1921 if (opusInfo[8]) {
1922 return ERROR_MALFORMED;
1923 }
1924 // Force version to 1 as per OpusHead definition
1925 // (http://wiki.xiph.org/OggOpus#ID_Header)
1926 opusInfo[8] = 1;
1927
1928 // Read Opus Specific Box values
1929 size_t opusOffset = 10;
1930 uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
1931 uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
1932 uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
1933
1934 // Convert Opus Specific Box values. ParseOpusHeader expects
1935 // the values in LE, however MP4 stores these values as BE
1936 // https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
1937 memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
1938 memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
1939 memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
1940
1941 static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
1942 static const int32_t kOpusSampleRate = 48000;
1943 int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
1944
1945 AMediaFormat_setBuffer(mLastTrack->meta,
1946 AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
1947 AMediaFormat_setBuffer(mLastTrack->meta,
1948 AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
1949 AMediaFormat_setBuffer(mLastTrack->meta,
1950 AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
1951
1952 data_offset += opusInfoSize;
1953 *offset = data_offset;
1954 CHECK_EQ(*offset, stop_offset);
1955 }
1956
1957 if (!mIsQT && chunk_type == FOURCC("alac")) {
1958 data_offset += sizeof(buffer);
1959
1960 status_t err = parseALACSampleEntry(&data_offset);
1961 if (err != OK) {
1962 ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
1963 return err;
1964 }
1965 *offset = data_offset;
1966 CHECK_EQ(*offset, stop_offset);
1967 }
1968
1969 if (chunk_type == FOURCC("fLaC")) {
1970
1971 // From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
1972 // 4 for mime, 4 for blockType and BlockLen, 34 for metadata
1973 uint8_t flacInfo[4 + 4 + 34];
1974 // skipping dFla, version
1975 data_offset += sizeof(buffer) + 12;
1976 size_t flacOffset = 4;
1977 // Add flaC header mime type to CSD
1978 strncpy((char *)flacInfo, "fLaC", 4);
1979 if (mDataSource->readAt(
1980 data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
1981 (ssize_t)sizeof(flacInfo) - flacOffset) {
1982 return ERROR_IO;
1983 }
1984 data_offset += sizeof(flacInfo) - flacOffset;
1985
1986 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
1987 sizeof(flacInfo));
1988 *offset = data_offset;
1989 CHECK_EQ(*offset, stop_offset);
1990 }
1991
1992 while (*offset < stop_offset) {
1993 status_t err = parseChunk(offset, depth + 1);
1994 if (err != OK) {
1995 return err;
1996 }
1997 }
1998
1999 if (*offset != stop_offset) {
2000 return ERROR_MALFORMED;
2001 }
2002 break;
2003 }
2004 case FOURCC("mhaC"):
2005 {
2006 // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
2007 constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
2008 + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
2009 + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
2010 uint8_t mhac_header[mhac_header_size];
2011 off64_t data_offset = *offset;
2012
2013 if (chunk_size < sizeof(mhac_header)) {
2014 return ERROR_MALFORMED;
2015 }
2016
2017 if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
2018 < (ssize_t)sizeof(mhac_header)) {
2019 return ERROR_IO;
2020 }
2021
2022 //get mpegh3daProfileLevelIndication
2023 const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
2024 AMediaFormat_setInt32(mLastTrack->meta,
2025 AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
2026 mpegh3daProfileLevelIndication);
2027
2028 //get referenceChannelLayout
2029 const uint32_t referenceChannelLayout = mhac_header[10];
2030 AMediaFormat_setInt32(mLastTrack->meta,
2031 AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
2032 referenceChannelLayout);
2033
2034 // get mpegh3daConfigLength
2035 const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
2036 if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
2037 return ERROR_MALFORMED;
2038 }
2039
2040 data_offset += sizeof(mhac_header);
2041 uint8_t mhac_config[mhac_config_size];
2042 if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
2043 < (ssize_t)sizeof(mhac_config)) {
2044 return ERROR_IO;
2045 }
2046
2047 AMediaFormat_setBuffer(mLastTrack->meta,
2048 AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
2049 data_offset += sizeof(mhac_config);
2050 *offset = data_offset;
2051 break;
2052 }
2053 case FOURCC("mhaP"):
2054 {
2055 // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
2056 constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
2057 + 1 /* numCompatibleSets */;
2058
2059 uint8_t mhap_header[mhap_header_size];
2060 off64_t data_offset = *offset;
2061
2062 if (chunk_size < (ssize_t)mhap_header_size) {
2063 return ERROR_MALFORMED;
2064 }
2065
2066 if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
2067 < (ssize_t)sizeof(mhap_header)) {
2068 return ERROR_IO;
2069 }
2070
2071 // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
2072 const uint32_t mhap_compatible_sets_size = mhap_header[8];
2073 if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
2074 return ERROR_MALFORMED;
2075 }
2076
2077 data_offset += sizeof(mhap_header);
2078 uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
2079 if (mDataSource->readAt(
2080 data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
2081 < (ssize_t)sizeof(mhap_compatible_sets)) {
2082 return ERROR_IO;
2083 }
2084
2085 AMediaFormat_setBuffer(mLastTrack->meta,
2086 AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
2087 mhap_compatible_sets, sizeof(mhap_compatible_sets));
2088 data_offset += sizeof(mhap_compatible_sets);
2089 *offset = data_offset;
2090 break;
2091 }
2092 case FOURCC("mp4v"):
2093 case FOURCC("encv"):
2094 case FOURCC("s263"):
2095 case FOURCC("H263"):
2096 case FOURCC("h263"):
2097 case FOURCC("avc1"):
2098 case FOURCC("hvc1"):
2099 case FOURCC("hev1"):
2100 case FOURCC("dvav"):
2101 case FOURCC("dva1"):
2102 case FOURCC("dvhe"):
2103 case FOURCC("dvh1"):
2104 case FOURCC("dav1"):
2105 case FOURCC("av01"):
2106 case FOURCC("vp09"):
2107 {
2108 uint8_t buffer[78];
2109 if (chunk_data_size < (ssize_t)sizeof(buffer)) {
2110 // Basic VideoSampleEntry size.
2111 return ERROR_MALFORMED;
2112 }
2113
2114 if (mDataSource->readAt(
2115 data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
2116 return ERROR_IO;
2117 }
2118
2119 // we can get data_ref_index value from U16_AT(&buffer[6])
2120 uint16_t width = U16_AT(&buffer[6 + 18]);
2121 uint16_t height = U16_AT(&buffer[6 + 20]);
2122
2123 // The video sample is not standard-compliant if it has invalid dimension.
2124 // Use some default width and height value, and
2125 // let the decoder figure out the actual width and height (and thus
2126 // be prepared for INFO_FOMRAT_CHANGED event).
2127 if (width == 0) width = 352;
2128 if (height == 0) height = 288;
2129
2130 // printf("*** coding='%s' width=%d height=%d\n",
2131 // chunk, width, height);
2132
2133 if (mLastTrack == NULL)
2134 return ERROR_MALFORMED;
2135
2136 if (chunk_type != FOURCC("encv")) {
2137 // if the chunk type is encv, we'll get the type from the frma box later
2138 AMediaFormat_setString(mLastTrack->meta,
2139 AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
2140 }
2141 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
2142 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
2143
2144 off64_t stop_offset = *offset + chunk_size;
2145 *offset = data_offset + sizeof(buffer);
2146 while (*offset < stop_offset) {
2147 status_t err = parseChunk(offset, depth + 1);
2148 if (err != OK) {
2149 return err;
2150 }
2151 }
2152
2153 if (*offset != stop_offset) {
2154 return ERROR_MALFORMED;
2155 }
2156 break;
2157 }
2158
2159 case FOURCC("stco"):
2160 case FOURCC("co64"):
2161 {
2162 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2163 return ERROR_MALFORMED;
2164 }
2165
2166 status_t err =
2167 mLastTrack->sampleTable->setChunkOffsetParams(
2168 chunk_type, data_offset, chunk_data_size);
2169
2170 *offset += chunk_size;
2171
2172 if (err != OK) {
2173 return err;
2174 }
2175
2176 break;
2177 }
2178
2179 case FOURCC("stsc"):
2180 {
2181 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2182 return ERROR_MALFORMED;
2183
2184 status_t err =
2185 mLastTrack->sampleTable->setSampleToChunkParams(
2186 data_offset, chunk_data_size);
2187
2188 *offset += chunk_size;
2189
2190 if (err != OK) {
2191 return err;
2192 }
2193
2194 break;
2195 }
2196
2197 case FOURCC("stsz"):
2198 case FOURCC("stz2"):
2199 {
2200 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
2201 return ERROR_MALFORMED;
2202 }
2203
2204 status_t err =
2205 mLastTrack->sampleTable->setSampleSizeParams(
2206 chunk_type, data_offset, chunk_data_size);
2207
2208 *offset += chunk_size;
2209
2210 if (err != OK) {
2211 return err;
2212 }
2213
2214 adjustRawDefaultFrameSize();
2215
2216 size_t max_size;
2217 err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
2218
2219 if (err != OK) {
2220 return err;
2221 }
2222
2223 if (max_size != 0) {
2224 // Assume that a given buffer only contains at most 10 chunks,
2225 // each chunk originally prefixed with a 2 byte length will
2226 // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
2227 // and thus will grow by 2 bytes per chunk.
2228 if (max_size > SIZE_MAX - 10 * 2) {
2229 ALOGE("max sample size too big: %zu", max_size);
2230 return ERROR_MALFORMED;
2231 }
2232 AMediaFormat_setInt32(mLastTrack->meta,
2233 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
2234 } else {
2235 // No size was specified. Pick a conservatively large size.
2236 uint32_t width, height;
2237 if (!AMediaFormat_getInt32(mLastTrack->meta,
2238 AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
2239 !AMediaFormat_getInt32(mLastTrack->meta,
2240 AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
2241 ALOGE("No width or height, assuming worst case 1080p");
2242 width = 1920;
2243 height = 1080;
2244 } else {
2245 // A resolution was specified, check that it's not too big. The values below
2246 // were chosen so that the calculations below don't cause overflows, they're
2247 // not indicating that resolutions up to 32kx32k are actually supported.
2248 if (width > 32768 || height > 32768) {
2249 ALOGE("can't support %u x %u video", width, height);
2250 return ERROR_MALFORMED;
2251 }
2252 }
2253
2254 const char *mime;
2255 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2256 if (!strncmp(mime, "audio/", 6)) {
2257 // for audio, use 128KB
2258 max_size = 1024 * 128;
2259 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
2260 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
2261 || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
2262 // AVC & HEVC requires compression ratio of at least 2, and uses
2263 // macroblocks
2264 max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
2265 } else {
2266 // For all other formats there is no minimum compression
2267 // ratio. Use compression ratio of 1.
2268 max_size = width * height * 3 / 2;
2269 }
2270 // HACK: allow 10% overhead
2271 // TODO: read sample size from traf atom for fragmented MPEG4.
2272 max_size += max_size / 10;
2273 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
2274 }
2275
2276 // NOTE: setting another piece of metadata invalidates any pointers (such as the
2277 // mimetype) previously obtained, so don't cache them.
2278 const char *mime;
2279 CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
2280 // Calculate average frame rate.
2281 if (!strncasecmp("video/", mime, 6)) {
2282 size_t nSamples = mLastTrack->sampleTable->countSamples();
2283 if (nSamples == 0) {
2284 int32_t trackId;
2285 if (AMediaFormat_getInt32(mLastTrack->meta,
2286 AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
2287 for (size_t i = 0; i < mTrex.size(); i++) {
2288 Trex *t = &mTrex.editItemAt(i);
2289 if (t->track_ID == (uint32_t) trackId) {
2290 if (t->default_sample_duration > 0) {
2291 int32_t frameRate =
2292 mLastTrack->timescale / t->default_sample_duration;
2293 AMediaFormat_setInt32(mLastTrack->meta,
2294 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2295 }
2296 break;
2297 }
2298 }
2299 }
2300 } else {
2301 int64_t durationUs;
2302 if (AMediaFormat_getInt64(mLastTrack->meta,
2303 AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
2304 if (durationUs > 0) {
2305 int32_t frameRate = (nSamples * 1000000LL +
2306 (durationUs >> 1)) / durationUs;
2307 AMediaFormat_setInt32(mLastTrack->meta,
2308 AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
2309 }
2310 }
2311 ALOGV("setting frame count %zu", nSamples);
2312 AMediaFormat_setInt32(mLastTrack->meta,
2313 AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
2314 }
2315 }
2316
2317 break;
2318 }
2319
2320 case FOURCC("stts"):
2321 {
2322 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2323 return ERROR_MALFORMED;
2324
2325 *offset += chunk_size;
2326
2327 if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
2328 char chunk[5];
2329 MakeFourCCString(mPath[depth - 1], chunk);
2330 ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
2331 break;
2332 }
2333
2334 status_t err =
2335 mLastTrack->sampleTable->setTimeToSampleParams(
2336 data_offset, chunk_data_size);
2337
2338 if (err != OK) {
2339 return err;
2340 }
2341
2342 break;
2343 }
2344
2345 case FOURCC("ctts"):
2346 {
2347 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2348 return ERROR_MALFORMED;
2349
2350 *offset += chunk_size;
2351
2352 status_t err =
2353 mLastTrack->sampleTable->setCompositionTimeToSampleParams(
2354 data_offset, chunk_data_size);
2355
2356 if (err != OK) {
2357 return err;
2358 }
2359
2360 break;
2361 }
2362
2363 case FOURCC("stss"):
2364 {
2365 if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
2366 return ERROR_MALFORMED;
2367
2368 *offset += chunk_size;
2369
2370 status_t err =
2371 mLastTrack->sampleTable->setSyncSampleParams(
2372 data_offset, chunk_data_size);
2373
2374 if (err != OK) {
2375 return err;
2376 }
2377
2378 break;
2379 }
2380
2381 // \xA9xyz
2382 case FOURCC("\251xyz"):
2383 {
2384 *offset += chunk_size;
2385
2386 // Best case the total data length inside "\xA9xyz" box would
2387 // be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
2388 // where "\x00\x05" is the text string length with value = 5,
2389 // "\0x15\xc7" is the language code = en, and "+0+0/" is a
2390 // location (string) value with longitude = 0 and latitude = 0.
2391 // Since some devices encountered in the wild omit the trailing
2392 // slash, we'll allow that.
2393 if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
2394 return ERROR_MALFORMED;
2395 }
2396
2397 uint16_t len;
2398 if (!mDataSource->getUInt16(data_offset, &len)) {
2399 return ERROR_IO;
2400 }
2401
2402 // allow "+0+0" without trailing slash
2403 if (len < 4 || len > chunk_data_size - 4) {
2404 return ERROR_MALFORMED;
2405 }
2406 // The location string following the language code is formatted
2407 // according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
2408 // Allocate 2 extra bytes, in case we need to add a trailing slash,
2409 // and to add a terminating 0.
2410 std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
2411 if (!buffer) {
2412 return NO_MEMORY;
2413 }
2414
2415 if (mDataSource->readAt(
2416 data_offset + 4, &buffer[0], len) < len) {
2417 return ERROR_IO;
2418 }
2419
2420 len = strlen(&buffer[0]);
2421 if (len < 4) {
2422 return ERROR_MALFORMED;
2423 }
2424 // Add a trailing slash if there wasn't one.
2425 if (buffer[len - 1] != '/') {
2426 buffer[len] = '/';
2427 }
2428 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
2429 break;
2430 }
2431
2432 case FOURCC("esds"):
2433 {
2434 *offset += chunk_size;
2435
2436 if (chunk_data_size < 4) {
2437 return ERROR_MALFORMED;
2438 }
2439
2440 auto tmp = heapbuffer<uint8_t>(chunk_data_size);
2441 uint8_t *buffer = tmp.get();
2442 if (buffer == NULL) {
2443 return -ENOMEM;
2444 }
2445
2446 if (mDataSource->readAt(
2447 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2448 return ERROR_IO;
2449 }
2450
2451 if (U32_AT(buffer) != 0) {
2452 // Should be version 0, flags 0.
2453 return ERROR_MALFORMED;
2454 }
2455
2456 if (mLastTrack == NULL)
2457 return ERROR_MALFORMED;
2458
2459 AMediaFormat_setBuffer(mLastTrack->meta,
2460 AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
2461
2462 if (mPath.size() >= 2
2463 && mPath[mPath.size() - 2] == FOURCC("mp4a")) {
2464 // Information from the ESDS must be relied on for proper
2465 // setup of sample rate and channel count for MPEG4 Audio.
2466 // The generic header appears to only contain generic
2467 // information...
2468
2469 status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
2470 &buffer[4], chunk_data_size - 4);
2471
2472 if (err != OK) {
2473 return err;
2474 }
2475 }
2476 if (mPath.size() >= 2
2477 && mPath[mPath.size() - 2] == FOURCC("mp4v")) {
2478 // Check if the video is MPEG2
2479 ESDS esds(&buffer[4], chunk_data_size - 4);
2480
2481 uint8_t objectTypeIndication;
2482 if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
2483 if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
2484 AMediaFormat_setString(mLastTrack->meta,
2485 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
2486 }
2487 }
2488 }
2489 break;
2490 }
2491
2492 case FOURCC("btrt"):
2493 {
2494 *offset += chunk_size;
2495 if (mLastTrack == NULL) {
2496 return ERROR_MALFORMED;
2497 }
2498
2499 uint8_t buffer[12];
2500 if (chunk_data_size != sizeof(buffer)) {
2501 return ERROR_MALFORMED;
2502 }
2503
2504 if (mDataSource->readAt(
2505 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2506 return ERROR_IO;
2507 }
2508
2509 uint32_t maxBitrate = U32_AT(&buffer[4]);
2510 uint32_t avgBitrate = U32_AT(&buffer[8]);
2511 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
2512 AMediaFormat_setInt32(mLastTrack->meta,
2513 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
2514 }
2515 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
2516 AMediaFormat_setInt32(mLastTrack->meta,
2517 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
2518 }
2519 break;
2520 }
2521
2522 case FOURCC("avcC"):
2523 {
2524 *offset += chunk_size;
2525
2526 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2527
2528 if (buffer.get() == NULL) {
2529 ALOGE("b/28471206");
2530 return NO_MEMORY;
2531 }
2532
2533 if (mDataSource->readAt(
2534 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2535 return ERROR_IO;
2536 }
2537
2538 if (mLastTrack == NULL)
2539 return ERROR_MALFORMED;
2540
2541 AMediaFormat_setBuffer(mLastTrack->meta,
2542 AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
2543
2544 break;
2545 }
2546 case FOURCC("hvcC"):
2547 {
2548 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2549
2550 if (buffer.get() == NULL) {
2551 ALOGE("b/28471206");
2552 return NO_MEMORY;
2553 }
2554
2555 if (mDataSource->readAt(
2556 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2557 return ERROR_IO;
2558 }
2559
2560 if (mLastTrack == NULL)
2561 return ERROR_MALFORMED;
2562
2563 AMediaFormat_setBuffer(mLastTrack->meta,
2564 AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
2565
2566 *offset += chunk_size;
2567 break;
2568 }
2569
2570 case FOURCC("vpcC"):
2571 case FOURCC("av1C"):
2572 {
2573 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2574
2575 if (buffer.get() == NULL) {
2576 ALOGE("b/28471206");
2577 return NO_MEMORY;
2578 }
2579
2580 if (mDataSource->readAt(
2581 data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2582 return ERROR_IO;
2583 }
2584
2585 if (mLastTrack == NULL)
2586 return ERROR_MALFORMED;
2587
2588 AMediaFormat_setBuffer(mLastTrack->meta,
2589 AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
2590
2591 *offset += chunk_size;
2592 break;
2593 }
2594 case FOURCC("dvcC"):
2595 case FOURCC("dvvC"): {
2596
2597 if (chunk_data_size != 24) {
2598 return ERROR_MALFORMED;
2599 }
2600
2601 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2602
2603 if (buffer.get() == NULL) {
2604 ALOGE("b/28471206");
2605 return NO_MEMORY;
2606 }
2607
2608 if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
2609 return ERROR_IO;
2610 }
2611
2612 if (mLastTrack == NULL)
2613 return ERROR_MALFORMED;
2614
2615 AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
2616 buffer.get(), chunk_data_size);
2617 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
2618 MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
2619
2620 *offset += chunk_size;
2621 break;
2622 }
2623 case FOURCC("d263"):
2624 {
2625 *offset += chunk_size;
2626 /*
2627 * d263 contains a fixed 7 bytes part:
2628 * vendor - 4 bytes
2629 * version - 1 byte
2630 * level - 1 byte
2631 * profile - 1 byte
2632 * optionally, "d263" box itself may contain a 16-byte
2633 * bit rate box (bitr)
2634 * average bit rate - 4 bytes
2635 * max bit rate - 4 bytes
2636 */
2637 char buffer[23];
2638 if (chunk_data_size != 7 &&
2639 chunk_data_size != 23) {
2640 ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
2641 return ERROR_MALFORMED;
2642 }
2643
2644 if (mDataSource->readAt(
2645 data_offset, buffer, chunk_data_size) < chunk_data_size) {
2646 return ERROR_IO;
2647 }
2648
2649 if (mLastTrack == NULL)
2650 return ERROR_MALFORMED;
2651
2652 AMediaFormat_setBuffer(mLastTrack->meta,
2653 AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
2654
2655 break;
2656 }
2657
2658 case FOURCC("meta"):
2659 {
2660 off64_t stop_offset = *offset + chunk_size;
2661 *offset = data_offset;
2662 bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
2663 if (!isParsingMetaKeys) {
2664 uint8_t buffer[4];
2665 if (chunk_data_size < (off64_t)sizeof(buffer)) {
2666 *offset = stop_offset;
2667 return ERROR_MALFORMED;
2668 }
2669
2670 if (mDataSource->readAt(
2671 data_offset, buffer, 4) < 4) {
2672 *offset = stop_offset;
2673 return ERROR_IO;
2674 }
2675
2676 if (U32_AT(buffer) != 0) {
2677 // Should be version 0, flags 0.
2678
2679 // If it's not, let's assume this is one of those
2680 // apparently malformed chunks that don't have flags
2681 // and completely different semantics than what's
2682 // in the MPEG4 specs and skip it.
2683 *offset = stop_offset;
2684 return OK;
2685 }
2686 *offset += sizeof(buffer);
2687 }
2688
2689 while (*offset < stop_offset) {
2690 status_t err = parseChunk(offset, depth + 1);
2691 if (err != OK) {
2692 return err;
2693 }
2694 }
2695
2696 if (*offset != stop_offset) {
2697 return ERROR_MALFORMED;
2698 }
2699 break;
2700 }
2701
2702 case FOURCC("iloc"):
2703 case FOURCC("iinf"):
2704 case FOURCC("iprp"):
2705 case FOURCC("pitm"):
2706 case FOURCC("idat"):
2707 case FOURCC("iref"):
2708 case FOURCC("ipro"):
2709 {
2710 if (mIsHeif || mIsAvif) {
2711 if (mItemTable == NULL) {
2712 mItemTable = new ItemTable(mDataSource, mIsHeif);
2713 }
2714 status_t err = mItemTable->parse(
2715 chunk_type, data_offset, chunk_data_size);
2716 if (err != OK) {
2717 return err;
2718 }
2719 }
2720 *offset += chunk_size;
2721 break;
2722 }
2723
2724 case FOURCC("mean"):
2725 case FOURCC("name"):
2726 case FOURCC("data"):
2727 {
2728 *offset += chunk_size;
2729
2730 if (mPath.size() == 6 && underMetaDataPath(mPath)) {
2731 status_t err = parseITunesMetaData(data_offset, chunk_data_size);
2732
2733 if (err != OK) {
2734 return err;
2735 }
2736 }
2737
2738 break;
2739 }
2740
2741 case FOURCC("mvhd"):
2742 {
2743 *offset += chunk_size;
2744
2745 if (depth != 1) {
2746 ALOGE("mvhd: depth %d", depth);
2747 return ERROR_MALFORMED;
2748 }
2749 if (chunk_data_size < 32) {
2750 return ERROR_MALFORMED;
2751 }
2752
2753 uint8_t header[32];
2754 if (mDataSource->readAt(
2755 data_offset, header, sizeof(header))
2756 < (ssize_t)sizeof(header)) {
2757 return ERROR_IO;
2758 }
2759
2760 uint64_t creationTime;
2761 uint64_t duration = 0;
2762 if (header[0] == 1) {
2763 creationTime = U64_AT(&header[4]);
2764 mHeaderTimescale = U32_AT(&header[20]);
2765 duration = U64_AT(&header[24]);
2766 if (duration == 0xffffffffffffffff) {
2767 duration = 0;
2768 }
2769 } else if (header[0] != 0) {
2770 return ERROR_MALFORMED;
2771 } else {
2772 creationTime = U32_AT(&header[4]);
2773 mHeaderTimescale = U32_AT(&header[12]);
2774 uint32_t d32 = U32_AT(&header[16]);
2775 if (d32 == 0xffffffff) {
2776 d32 = 0;
2777 }
2778 duration = d32;
2779 }
2780 if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
2781 AMediaFormat_setInt64(mFileMetaData,
2782 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2783 }
2784
2785 String8 s;
2786 if (convertTimeToDate(creationTime, &s)) {
2787 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
2788 }
2789
2790 break;
2791 }
2792
2793 case FOURCC("mehd"):
2794 {
2795 *offset += chunk_size;
2796
2797 if (chunk_data_size < 8) {
2798 return ERROR_MALFORMED;
2799 }
2800
2801 uint8_t flags[4];
2802 if (mDataSource->readAt(
2803 data_offset, flags, sizeof(flags))
2804 < (ssize_t)sizeof(flags)) {
2805 return ERROR_IO;
2806 }
2807
2808 uint64_t duration = 0;
2809 if (flags[0] == 1) {
2810 // 64 bit
2811 if (chunk_data_size < 12) {
2812 return ERROR_MALFORMED;
2813 }
2814 mDataSource->getUInt64(data_offset + 4, &duration);
2815 if (duration == 0xffffffffffffffff) {
2816 duration = 0;
2817 }
2818 } else if (flags[0] == 0) {
2819 // 32 bit
2820 uint32_t d32;
2821 mDataSource->getUInt32(data_offset + 4, &d32);
2822 if (d32 == 0xffffffff) {
2823 d32 = 0;
2824 }
2825 duration = d32;
2826 } else {
2827 return ERROR_MALFORMED;
2828 }
2829
2830 if (duration != 0 && mHeaderTimescale != 0) {
2831 AMediaFormat_setInt64(mFileMetaData,
2832 AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
2833 }
2834
2835 break;
2836 }
2837
2838 case FOURCC("mdat"):
2839 {
2840 mMdatFound = true;
2841
2842 *offset += chunk_size;
2843 break;
2844 }
2845
2846 case FOURCC("hdlr"):
2847 {
2848 *offset += chunk_size;
2849
2850 if (underQTMetaPath(mPath, 3)) {
2851 break;
2852 }
2853
2854 uint32_t buffer;
2855 if (mDataSource->readAt(
2856 data_offset + 8, &buffer, 4) < 4) {
2857 return ERROR_IO;
2858 }
2859
2860 uint32_t type = ntohl(buffer);
2861 // For the 3GPP file format, the handler-type within the 'hdlr' box
2862 // shall be 'text'. We also want to support 'sbtl' handler type
2863 // for a practical reason as various MPEG4 containers use it.
2864 if (type == FOURCC("text") || type == FOURCC("sbtl")) {
2865 if (mLastTrack != NULL) {
2866 AMediaFormat_setString(mLastTrack->meta,
2867 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
2868 }
2869 }
2870
2871 break;
2872 }
2873
2874 case FOURCC("keys"):
2875 {
2876 *offset += chunk_size;
2877
2878 if (underQTMetaPath(mPath, 3)) {
2879 status_t err = parseQTMetaKey(data_offset, chunk_data_size);
2880 if (err != OK) {
2881 return err;
2882 }
2883 }
2884 break;
2885 }
2886
2887 case FOURCC("trex"):
2888 {
2889 *offset += chunk_size;
2890
2891 if (chunk_data_size < 24) {
2892 return ERROR_IO;
2893 }
2894 Trex trex;
2895 if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
2896 !mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
2897 !mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
2898 !mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
2899 !mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
2900 return ERROR_IO;
2901 }
2902 mTrex.add(trex);
2903 break;
2904 }
2905
2906 case FOURCC("tx3g"):
2907 {
2908 if (mLastTrack == NULL)
2909 return ERROR_MALFORMED;
2910
2911 // complain about ridiculous chunks
2912 if (chunk_size > kMaxAtomSize) {
2913 return ERROR_MALFORMED;
2914 }
2915
2916 // complain about empty atoms
2917 if (chunk_data_size <= 0) {
2918 ALOGE("b/124330204");
2919 android_errorWriteLog(0x534e4554, "124330204");
2920 return ERROR_MALFORMED;
2921 }
2922
2923 // should fill buffer based on "data_offset" and "chunk_data_size"
2924 // instead of *offset and chunk_size;
2925 // but we've been feeding the extra data to consumers for multiple releases and
2926 // if those apps are compensating for it, we'd break them with such a change
2927 //
2928
2929 if (mLastTrack->mTx3gBuffer == NULL) {
2930 mLastTrack->mTx3gSize = 0;
2931 mLastTrack->mTx3gFilled = 0;
2932 }
2933 if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
2934 size_t growth = kTx3gGrowth;
2935 if (growth < chunk_size) {
2936 growth = chunk_size;
2937 }
2938 // although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
2939 if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
2940 ALOGE("b/124330204 - too much space");
2941 android_errorWriteLog(0x534e4554, "124330204");
2942 return ERROR_MALFORMED;
2943 }
2944 uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
2945 mLastTrack->mTx3gSize + growth);
2946 if (updated == NULL) {
2947 return ERROR_MALFORMED;
2948 }
2949 mLastTrack->mTx3gBuffer = updated;
2950 mLastTrack->mTx3gSize += growth;
2951 }
2952
2953 if ((size_t)(mDataSource->readAt(*offset,
2954 mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
2955 chunk_size))
2956 < chunk_size) {
2957
2958 // advance read pointer so we don't end up reading this again
2959 *offset += chunk_size;
2960 return ERROR_IO;
2961 }
2962
2963 mLastTrack->mTx3gFilled += chunk_size;
2964 *offset += chunk_size;
2965 break;
2966 }
2967
2968 case FOURCC("covr"):
2969 {
2970 *offset += chunk_size;
2971
2972 ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
2973 chunk_data_size, data_offset);
2974
2975 if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
2976 return ERROR_MALFORMED;
2977 }
2978 auto buffer = heapbuffer<uint8_t>(chunk_data_size);
2979 if (buffer.get() == NULL) {
2980 ALOGE("b/28471206");
2981 return NO_MEMORY;
2982 }
2983 if (mDataSource->readAt(
2984 data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
2985 return ERROR_IO;
2986 }
2987 const int kSkipBytesOfDataBox = 16;
2988 if (chunk_data_size <= kSkipBytesOfDataBox) {
2989 return ERROR_MALFORMED;
2990 }
2991
2992 AMediaFormat_setBuffer(mFileMetaData,
2993 AMEDIAFORMAT_KEY_ALBUMART,
2994 buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
2995
2996 break;
2997 }
2998
2999 case FOURCC("colr"):
3000 {
3001 *offset += chunk_size;
3002 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3003 // ignore otherwise
3004 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3005 status_t err = parseColorInfo(data_offset, chunk_data_size);
3006 if (err != OK) {
3007 return err;
3008 }
3009 }
3010
3011 break;
3012 }
3013
3014 case FOURCC("pasp"):
3015 {
3016 *offset += chunk_size;
3017 // this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
3018 // ignore otherwise
3019 if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
3020 status_t err = parsePaspBox(data_offset, chunk_data_size);
3021 if (err != OK) {
3022 return err;
3023 }
3024 }
3025
3026 break;
3027 }
3028
3029 case FOURCC("titl"):
3030 case FOURCC("perf"):
3031 case FOURCC("auth"):
3032 case FOURCC("gnre"):
3033 case FOURCC("albm"):
3034 case FOURCC("yrrc"):
3035 {
3036 *offset += chunk_size;
3037
3038 status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
3039
3040 if (err != OK) {
3041 return err;
3042 }
3043
3044 break;
3045 }
3046
3047 case FOURCC("ID32"):
3048 {
3049 *offset += chunk_size;
3050
3051 if (chunk_data_size < 6) {
3052 return ERROR_MALFORMED;
3053 }
3054
3055 parseID3v2MetaData(data_offset + 6, chunk_data_size - 6);
3056
3057 break;
3058 }
3059
3060 case FOURCC("----"):
3061 {
3062 mLastCommentMean.clear();
3063 mLastCommentName.clear();
3064 mLastCommentData.clear();
3065 *offset += chunk_size;
3066 break;
3067 }
3068
3069 case FOURCC("sidx"):
3070 {
3071 status_t err = parseSegmentIndex(data_offset, chunk_data_size);
3072 if (err != OK) {
3073 return err;
3074 }
3075 *offset += chunk_size;
3076 return UNKNOWN_ERROR; // stop parsing after sidx
3077 }
3078
3079 case FOURCC("ac-3"):
3080 {
3081 *offset += chunk_size;
3082 // bypass ac-3 if parse fail
3083 if (parseAC3SpecificBox(data_offset) != OK) {
3084 if (mLastTrack != NULL) {
3085 ALOGW("Fail to parse ac-3");
3086 mLastTrack->skipTrack = true;
3087 }
3088 }
3089 return OK;
3090 }
3091
3092 case FOURCC("ec-3"):
3093 {
3094 *offset += chunk_size;
3095 // bypass ec-3 if parse fail
3096 if (parseEAC3SpecificBox(data_offset) != OK) {
3097 if (mLastTrack != NULL) {
3098 ALOGW("Fail to parse ec-3");
3099 mLastTrack->skipTrack = true;
3100 }
3101 }
3102 return OK;
3103 }
3104
3105 case FOURCC("ac-4"):
3106 {
3107 *offset += chunk_size;
3108 // bypass ac-4 if parse fail
3109 if (parseAC4SpecificBox(data_offset) != OK) {
3110 if (mLastTrack != NULL) {
3111 ALOGW("Fail to parse ac-4");
3112 mLastTrack->skipTrack = true;
3113 }
3114 }
3115 return OK;
3116 }
3117
3118 case FOURCC("ftyp"):
3119 {
3120 if (chunk_data_size < 8 || depth != 0) {
3121 return ERROR_MALFORMED;
3122 }
3123
3124 off64_t stop_offset = *offset + chunk_size;
3125 uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
3126 std::set<uint32_t> brandSet;
3127 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
3128 if (i == 1) {
3129 // Skip this index, it refers to the minorVersion,
3130 // not a brand.
3131 continue;
3132 }
3133
3134 uint32_t brand;
3135 if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
3136 return ERROR_MALFORMED;
3137 }
3138
3139 brand = ntohl(brand);
3140 brandSet.insert(brand);
3141 }
3142
3143 if (brandSet.count(FOURCC("qt ")) > 0) {
3144 mIsQT = true;
3145 } else {
3146 if (brandSet.count(FOURCC("mif1")) > 0
3147 && brandSet.count(FOURCC("heic")) > 0) {
3148 ALOGV("identified HEIF image");
3149
3150 mIsHeif = true;
3151 brandSet.erase(FOURCC("mif1"));
3152 brandSet.erase(FOURCC("heic"));
3153 } else if (brandSet.count(FOURCC("avif")) > 0 ||
3154 brandSet.count(FOURCC("avis")) > 0) {
3155 ALOGV("identified AVIF image");
3156 mIsAvif = true;
3157 brandSet.erase(FOURCC("avif"));
3158 brandSet.erase(FOURCC("avis"));
3159 }
3160
3161 if (!brandSet.empty()) {
3162 // This means that the file should have moov box.
3163 // It could be any iso files (mp4, heifs, etc.)
3164 mHasMoovBox = true;
3165 if (mIsHeif || mIsAvif) {
3166 ALOGV("identified %s image with other tracks", mIsHeif ? "HEIF" : "AVIF");
3167 }
3168 }
3169 }
3170
3171 *offset = stop_offset;
3172
3173 break;
3174 }
3175
3176 default:
3177 {
3178 // check if we're parsing 'ilst' for meta keys
3179 // if so, treat type as a number (key-id).
3180 if (underQTMetaPath(mPath, 3)) {
3181 status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
3182 if (err != OK) {
3183 return err;
3184 }
3185 }
3186
3187 *offset += chunk_size;
3188 break;
3189 }
3190 }
3191
3192 return OK;
3193 }
3194
parseChannelCountSampleRate(off64_t * offset,uint16_t * channelCount,uint16_t * sampleRate)3195 status_t MPEG4Extractor::parseChannelCountSampleRate(
3196 off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
3197 // skip 16 bytes:
3198 // + 6-byte reserved,
3199 // + 2-byte data reference index,
3200 // + 8-byte reserved
3201 *offset += 16;
3202 if (!mDataSource->getUInt16(*offset, channelCount)) {
3203 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
3204 return ERROR_MALFORMED;
3205 }
3206 // skip 8 bytes:
3207 // + 2-byte channelCount,
3208 // + 2-byte sample size,
3209 // + 4-byte reserved
3210 *offset += 8;
3211 if (!mDataSource->getUInt16(*offset, sampleRate)) {
3212 ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
3213 return ERROR_MALFORMED;
3214 }
3215 // skip 4 bytes:
3216 // + 2-byte sampleRate,
3217 // + 2-byte reserved
3218 *offset += 4;
3219 return OK;
3220 }
3221
parseAC4SpecificBox(off64_t offset)3222 status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
3223 if (mLastTrack == NULL) {
3224 return ERROR_MALFORMED;
3225 }
3226
3227 uint16_t sampleRate, channelCount;
3228 status_t status;
3229 if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
3230 return status;
3231 }
3232 uint32_t size;
3233 // + 4-byte size
3234 // + 4-byte type
3235 // + 3-byte payload
3236 const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
3237 if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
3238 ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");
3239 return ERROR_MALFORMED;
3240 }
3241
3242 // + 4-byte size
3243 offset += 4;
3244 uint32_t type;
3245 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac4")) {
3246 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: header not dac4");
3247 return ERROR_MALFORMED;
3248 }
3249
3250 // + 4-byte type
3251 offset += 4;
3252 const uint32_t kAC4SpecificBoxPayloadSize = 1176;
3253 uint8_t chunk[kAC4SpecificBoxPayloadSize];
3254 ssize_t dsiSize = size - 8; // size of box - size and type fields
3255 if (dsiSize >= (ssize_t)kAC4SpecificBoxPayloadSize ||
3256 mDataSource->readAt(offset, chunk, dsiSize) != dsiSize) {
3257 ALOGE("MPEG4Extractor: error while reading ac-4 specific block: bitstream fields");
3258 return ERROR_MALFORMED;
3259 }
3260 // + size-byte payload
3261 offset += dsiSize;
3262 ABitReader br(chunk, dsiSize);
3263 AC4DSIParser parser(br);
3264 if (!parser.parse()){
3265 ALOGE("MPEG4Extractor: error while parsing ac-4 specific block");
3266 return ERROR_MALFORMED;
3267 }
3268
3269 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC4);
3270 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3271 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3272
3273 AudioPresentationCollection presentations;
3274 // translate the AC4 presentation information to audio presentations for this track
3275 AC4DSIParser::AC4Presentations ac4Presentations = parser.getPresentations();
3276 if (!ac4Presentations.empty()) {
3277 for (const auto& ac4Presentation : ac4Presentations) {
3278 auto& presentation = ac4Presentation.second;
3279 if (!presentation.mEnabled) {
3280 continue;
3281 }
3282 AudioPresentationV1 ap;
3283 ap.mPresentationId = presentation.mGroupIndex;
3284 ap.mProgramId = presentation.mProgramID;
3285 ap.mLanguage = presentation.mLanguage;
3286 if (presentation.mPreVirtualized) {
3287 ap.mMasteringIndication = MASTERED_FOR_HEADPHONE;
3288 } else {
3289 switch (presentation.mChannelMode) {
3290 case AC4Parser::AC4Presentation::kChannelMode_Mono:
3291 case AC4Parser::AC4Presentation::kChannelMode_Stereo:
3292 ap.mMasteringIndication = MASTERED_FOR_STEREO;
3293 break;
3294 case AC4Parser::AC4Presentation::kChannelMode_3_0:
3295 case AC4Parser::AC4Presentation::kChannelMode_5_0:
3296 case AC4Parser::AC4Presentation::kChannelMode_5_1:
3297 case AC4Parser::AC4Presentation::kChannelMode_7_0_34:
3298 case AC4Parser::AC4Presentation::kChannelMode_7_1_34:
3299 case AC4Parser::AC4Presentation::kChannelMode_7_0_52:
3300 case AC4Parser::AC4Presentation::kChannelMode_7_1_52:
3301 ap.mMasteringIndication = MASTERED_FOR_SURROUND;
3302 break;
3303 case AC4Parser::AC4Presentation::kChannelMode_7_0_322:
3304 case AC4Parser::AC4Presentation::kChannelMode_7_1_322:
3305 case AC4Parser::AC4Presentation::kChannelMode_7_0_4:
3306 case AC4Parser::AC4Presentation::kChannelMode_7_1_4:
3307 case AC4Parser::AC4Presentation::kChannelMode_9_0_4:
3308 case AC4Parser::AC4Presentation::kChannelMode_9_1_4:
3309 case AC4Parser::AC4Presentation::kChannelMode_22_2:
3310 ap.mMasteringIndication = MASTERED_FOR_3D;
3311 break;
3312 default:
3313 ALOGE("Invalid channel mode in AC4 presentation");
3314 return ERROR_MALFORMED;
3315 }
3316 }
3317
3318 ap.mAudioDescriptionAvailable = (presentation.mContentClassifier ==
3319 AC4Parser::AC4Presentation::kVisuallyImpaired);
3320 ap.mSpokenSubtitlesAvailable = (presentation.mContentClassifier ==
3321 AC4Parser::AC4Presentation::kVoiceOver);
3322 ap.mDialogueEnhancementAvailable = presentation.mHasDialogEnhancements;
3323 if (!ap.mLanguage.empty()) {
3324 ap.mLabels.emplace(ap.mLanguage, presentation.mDescription);
3325 }
3326 presentations.push_back(std::move(ap));
3327 }
3328 }
3329
3330 if (presentations.empty()) {
3331 // Clear audio presentation info in metadata.
3332 AMediaFormat_setBuffer(
3333 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO, nullptr, 0);
3334 } else {
3335 std::ostringstream outStream(std::ios::out);
3336 serializeAudioPresentations(presentations, &outStream);
3337 AMediaFormat_setBuffer(
3338 mLastTrack->meta, AMEDIAFORMAT_KEY_AUDIO_PRESENTATION_INFO,
3339 outStream.str().data(), outStream.str().size());
3340 }
3341 return OK;
3342 }
3343
parseEAC3SpecificBox(off64_t offset)3344 status_t MPEG4Extractor::parseEAC3SpecificBox(off64_t offset) {
3345 if (mLastTrack == NULL) {
3346 return ERROR_MALFORMED;
3347 }
3348
3349 uint16_t sampleRate, channels;
3350 status_t status;
3351 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3352 return status;
3353 }
3354 uint32_t size;
3355 // + 4-byte size
3356 // + 4-byte type
3357 // + 3-byte payload
3358 const uint32_t kEAC3SpecificBoxMinSize = 11;
3359 // 13 + 3 + (8 * (2 + 5 + 5 + 3 + 1 + 3 + 4 + (14 * 9 + 1))) bits == 152 bytes theoretical max
3360 // calculated from the required bits read below as well as the maximum number of independent
3361 // and dependant sub streams you can have
3362 const uint32_t kEAC3SpecificBoxMaxSize = 152;
3363 if (!mDataSource->getUInt32(offset, &size) ||
3364 size < kEAC3SpecificBoxMinSize ||
3365 size > kEAC3SpecificBoxMaxSize) {
3366 ALOGE("MPEG4Extractor: error while reading eac-3 block: cannot read specific box size");
3367 return ERROR_MALFORMED;
3368 }
3369
3370 offset += 4;
3371 uint32_t type;
3372 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dec3")) {
3373 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: header not dec3");
3374 return ERROR_MALFORMED;
3375 }
3376
3377 offset += 4;
3378 uint8_t* chunk = new (std::nothrow) uint8_t[size];
3379 if (chunk == NULL) {
3380 return ERROR_MALFORMED;
3381 }
3382
3383 if (mDataSource->readAt(offset, chunk, size) != (ssize_t)size) {
3384 ALOGE("MPEG4Extractor: error while reading eac-3 specific block: bitstream fields");
3385 delete[] chunk;
3386 return ERROR_MALFORMED;
3387 }
3388
3389 ABitReader br(chunk, size);
3390 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3391 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3392
3393 if (br.numBitsLeft() < 16) {
3394 delete[] chunk;
3395 return ERROR_MALFORMED;
3396 }
3397 unsigned data_rate = br.getBits(13);
3398 ALOGV("EAC3 data rate = %d", data_rate);
3399
3400 unsigned num_ind_sub = br.getBits(3) + 1;
3401 ALOGV("EAC3 independant substreams = %d", num_ind_sub);
3402 if (br.numBitsLeft() < (num_ind_sub * 23)) {
3403 delete[] chunk;
3404 return ERROR_MALFORMED;
3405 }
3406
3407 unsigned channelCount = 0;
3408 for (unsigned i = 0; i < num_ind_sub; i++) {
3409 unsigned fscod = br.getBits(2);
3410 if (fscod == 3) {
3411 ALOGE("Incorrect fscod (3) in EAC3 header");
3412 delete[] chunk;
3413 return ERROR_MALFORMED;
3414 }
3415 unsigned boxSampleRate = sampleRateTable[fscod];
3416 if (boxSampleRate != sampleRate) {
3417 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3418 boxSampleRate, sampleRate);
3419 delete[] chunk;
3420 return ERROR_MALFORMED;
3421 }
3422
3423 unsigned bsid = br.getBits(5);
3424 if (bsid == 9 || bsid == 10) {
3425 ALOGW("EAC3 stream (bsid=%d) may be silenced by the decoder", bsid);
3426 } else if (bsid > 16) {
3427 ALOGE("EAC3 stream (bsid=%d) is not compatible with ETSI TS 102 366 v1.4.1", bsid);
3428 delete[] chunk;
3429 return ERROR_MALFORMED;
3430 }
3431
3432 // skip
3433 br.skipBits(2);
3434 unsigned bsmod = br.getBits(3);
3435 unsigned acmod = br.getBits(3);
3436 unsigned lfeon = br.getBits(1);
3437 // we currently only support the first stream
3438 if (i == 0)
3439 channelCount = channelCountTable[acmod] + lfeon;
3440 ALOGV("bsmod = %d, acmod = %d, lfeon = %d", bsmod, acmod, lfeon);
3441
3442 br.skipBits(3);
3443 unsigned num_dep_sub = br.getBits(4);
3444 ALOGV("EAC3 dependant substreams = %d", num_dep_sub);
3445 if (num_dep_sub != 0) {
3446 if (br.numBitsLeft() < 9) {
3447 delete[] chunk;
3448 return ERROR_MALFORMED;
3449 }
3450 static const char* chan_loc_tbl[] = { "Lc/Rc","Lrs/Rrs","Cs","Ts","Lsd/Rsd",
3451 "Lw/Rw","Lvh/Rvh","Cvh","Lfe2" };
3452 unsigned chan_loc = br.getBits(9);
3453 unsigned mask = 1;
3454 for (unsigned j = 0; j < 9; j++, mask <<= 1) {
3455 if ((chan_loc & mask) != 0) {
3456 // we currently only support the first stream
3457 if (i == 0) {
3458 channelCount++;
3459 // these are 2 channels in the mask
3460 if (j == 0 || j == 1 || j == 4 || j == 5 || j == 6) {
3461 channelCount++;
3462 }
3463 }
3464 ALOGV(" %s", chan_loc_tbl[j]);
3465 }
3466 }
3467 } else {
3468 if (br.numBitsLeft() == 0) {
3469 delete[] chunk;
3470 return ERROR_MALFORMED;
3471 }
3472 br.skipBits(1);
3473 }
3474 }
3475
3476 if (br.numBitsLeft() != 0) {
3477 if (br.numBitsLeft() < 8) {
3478 delete[] chunk;
3479 return ERROR_MALFORMED;
3480 }
3481 unsigned mask = br.getBits(8);
3482 for (unsigned i = 0; i < 8; i++) {
3483 if (((0x1 << i) && mask) == 0)
3484 continue;
3485
3486 if (br.numBitsLeft() < 8) {
3487 delete[] chunk;
3488 return ERROR_MALFORMED;
3489 }
3490 switch (i) {
3491 case 0: {
3492 unsigned complexity = br.getBits(8);
3493 ALOGV("Found a JOC stream with complexity = %d", complexity);
3494 }break;
3495 default: {
3496 br.skipBits(8);
3497 }break;
3498 }
3499 }
3500 }
3501 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_EAC3);
3502 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3503 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3504
3505 delete[] chunk;
3506 return OK;
3507 }
3508
parseAC3SpecificBox(off64_t offset)3509 status_t MPEG4Extractor::parseAC3SpecificBox(off64_t offset) {
3510 if (mLastTrack == NULL) {
3511 return ERROR_MALFORMED;
3512 }
3513
3514 uint16_t sampleRate, channels;
3515 status_t status;
3516 if ((status = parseChannelCountSampleRate(&offset, &channels, &sampleRate)) != OK) {
3517 return status;
3518 }
3519 uint32_t size;
3520 // + 4-byte size
3521 // + 4-byte type
3522 // + 3-byte payload
3523 const uint32_t kAC3SpecificBoxSize = 11;
3524 if (!mDataSource->getUInt32(offset, &size) || size < kAC3SpecificBoxSize) {
3525 ALOGE("MPEG4Extractor: error while reading ac-3 block: cannot read specific box size");
3526 return ERROR_MALFORMED;
3527 }
3528
3529 offset += 4;
3530 uint32_t type;
3531 if (!mDataSource->getUInt32(offset, &type) || type != FOURCC("dac3")) {
3532 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: header not dac3");
3533 return ERROR_MALFORMED;
3534 }
3535
3536 offset += 4;
3537 const uint32_t kAC3SpecificBoxPayloadSize = 3;
3538 uint8_t chunk[kAC3SpecificBoxPayloadSize];
3539 if (mDataSource->readAt(offset, chunk, sizeof(chunk)) != sizeof(chunk)) {
3540 ALOGE("MPEG4Extractor: error while reading ac-3 specific block: bitstream fields");
3541 return ERROR_MALFORMED;
3542 }
3543
3544 ABitReader br(chunk, sizeof(chunk));
3545 static const unsigned channelCountTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
3546 static const unsigned sampleRateTable[] = {48000, 44100, 32000};
3547
3548 unsigned fscod = br.getBits(2);
3549 if (fscod == 3) {
3550 ALOGE("Incorrect fscod (3) in AC3 header");
3551 return ERROR_MALFORMED;
3552 }
3553 unsigned boxSampleRate = sampleRateTable[fscod];
3554 if (boxSampleRate != sampleRate) {
3555 ALOGE("sample rate mismatch: boxSampleRate = %d, sampleRate = %d",
3556 boxSampleRate, sampleRate);
3557 return ERROR_MALFORMED;
3558 }
3559
3560 unsigned bsid = br.getBits(5);
3561 if (bsid > 8) {
3562 ALOGW("Incorrect bsid in AC3 header. Possibly E-AC-3?");
3563 return ERROR_MALFORMED;
3564 }
3565
3566 // skip
3567 br.skipBits(3); // bsmod
3568
3569 unsigned acmod = br.getBits(3);
3570 unsigned lfeon = br.getBits(1);
3571 unsigned channelCount = channelCountTable[acmod] + lfeon;
3572
3573 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_AC3);
3574 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, channelCount);
3575 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
3576 return OK;
3577 }
3578
parseALACSampleEntry(off64_t * offset)3579 status_t MPEG4Extractor::parseALACSampleEntry(off64_t *offset) {
3580 // See 'external/alac/ALACMagicCookieDescription.txt for the detail'.
3581 // Store ALAC magic cookie (decoder needs it).
3582 uint8_t alacInfo[12];
3583 off64_t data_offset = *offset;
3584
3585 if (mDataSource->readAt(
3586 data_offset, alacInfo, sizeof(alacInfo)) < (ssize_t)sizeof(alacInfo)) {
3587 return ERROR_IO;
3588 }
3589 uint32_t size = U32_AT(&alacInfo[0]);
3590 if ((size != ALAC_SPECIFIC_INFO_SIZE) ||
3591 (U32_AT(&alacInfo[4]) != FOURCC("alac")) ||
3592 (U32_AT(&alacInfo[8]) != 0)) {
3593 ALOGV("Size:%u, U32_AT(&alacInfo[4]):%u, U32_AT(&alacInfo[8]):%u",
3594 size, U32_AT(&alacInfo[4]), U32_AT(&alacInfo[8]));
3595 return ERROR_MALFORMED;
3596 }
3597 data_offset += sizeof(alacInfo);
3598 uint8_t cookie[size - sizeof(alacInfo)];
3599 if (mDataSource->readAt(
3600 data_offset, cookie, sizeof(cookie)) < (ssize_t)sizeof(cookie)) {
3601 return ERROR_IO;
3602 }
3603
3604 uint8_t bitsPerSample = cookie[5];
3605 AMediaFormat_setInt32(mLastTrack->meta,
3606 AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, bitsPerSample);
3607 AMediaFormat_setInt32(mLastTrack->meta,
3608 AMEDIAFORMAT_KEY_CHANNEL_COUNT, cookie[9]);
3609 AMediaFormat_setInt32(mLastTrack->meta,
3610 AMEDIAFORMAT_KEY_SAMPLE_RATE, U32_AT(&cookie[20]));
3611 AMediaFormat_setBuffer(mLastTrack->meta,
3612 AMEDIAFORMAT_KEY_CSD_0, cookie, sizeof(cookie));
3613 data_offset += sizeof(cookie);
3614 *offset = data_offset;
3615 return OK;
3616 }
3617
parseSegmentIndex(off64_t offset,size_t size)3618 status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
3619 ALOGV("MPEG4Extractor::parseSegmentIndex");
3620
3621 if (size < 12) {
3622 return -EINVAL;
3623 }
3624
3625 uint32_t flags;
3626 if (!mDataSource->getUInt32(offset, &flags)) {
3627 return ERROR_MALFORMED;
3628 }
3629
3630 uint32_t version = flags >> 24;
3631 flags &= 0xffffff;
3632
3633 ALOGV("sidx version %d", version);
3634
3635 uint32_t referenceId;
3636 if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
3637 return ERROR_MALFORMED;
3638 }
3639
3640 uint32_t timeScale;
3641 if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
3642 return ERROR_MALFORMED;
3643 }
3644 ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
3645 if (timeScale == 0)
3646 return ERROR_MALFORMED;
3647
3648 uint64_t earliestPresentationTime;
3649 uint64_t firstOffset;
3650
3651 offset += 12;
3652 size -= 12;
3653
3654 if (version == 0) {
3655 if (size < 8) {
3656 return -EINVAL;
3657 }
3658 uint32_t tmp;
3659 if (!mDataSource->getUInt32(offset, &tmp)) {
3660 return ERROR_MALFORMED;
3661 }
3662 earliestPresentationTime = tmp;
3663 if (!mDataSource->getUInt32(offset + 4, &tmp)) {
3664 return ERROR_MALFORMED;
3665 }
3666 firstOffset = tmp;
3667 offset += 8;
3668 size -= 8;
3669 } else {
3670 if (size < 16) {
3671 return -EINVAL;
3672 }
3673 if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
3674 return ERROR_MALFORMED;
3675 }
3676 if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
3677 return ERROR_MALFORMED;
3678 }
3679 offset += 16;
3680 size -= 16;
3681 }
3682 ALOGV("sidx pres/off: %" PRIu64 "/%" PRIu64, earliestPresentationTime, firstOffset);
3683
3684 if (size < 4) {
3685 return -EINVAL;
3686 }
3687
3688 uint16_t referenceCount;
3689 if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
3690 return ERROR_MALFORMED;
3691 }
3692 offset += 4;
3693 size -= 4;
3694 ALOGV("refcount: %d", referenceCount);
3695
3696 if (size < referenceCount * 12) {
3697 return -EINVAL;
3698 }
3699
3700 uint64_t total_duration = 0;
3701 for (unsigned int i = 0; i < referenceCount; i++) {
3702 uint32_t d1, d2, d3;
3703
3704 if (!mDataSource->getUInt32(offset, &d1) || // size
3705 !mDataSource->getUInt32(offset + 4, &d2) || // duration
3706 !mDataSource->getUInt32(offset + 8, &d3)) { // flags
3707 return ERROR_MALFORMED;
3708 }
3709
3710 if (d1 & 0x80000000) {
3711 ALOGW("sub-sidx boxes not supported yet");
3712 }
3713 bool sap = d3 & 0x80000000;
3714 uint32_t saptype = (d3 >> 28) & 7;
3715 if (!sap || (saptype != 1 && saptype != 2)) {
3716 // type 1 and 2 are sync samples
3717 ALOGW("not a stream access point, or unsupported type: %08x", d3);
3718 }
3719 total_duration += d2;
3720 offset += 12;
3721 ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
3722 SidxEntry se;
3723 se.mSize = d1 & 0x7fffffff;
3724 se.mDurationUs = 1000000LL * d2 / timeScale;
3725 mSidxEntries.add(se);
3726 }
3727
3728 uint64_t sidxDuration = total_duration * 1000000 / timeScale;
3729
3730 if (mLastTrack == NULL)
3731 return ERROR_MALFORMED;
3732
3733 int64_t metaDuration;
3734 if (!AMediaFormat_getInt64(mLastTrack->meta,
3735 AMEDIAFORMAT_KEY_DURATION, &metaDuration) || metaDuration == 0) {
3736 AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, sidxDuration);
3737 }
3738 return OK;
3739 }
3740
parseQTMetaKey(off64_t offset,size_t size)3741 status_t MPEG4Extractor::parseQTMetaKey(off64_t offset, size_t size) {
3742 if (size < 8) {
3743 return ERROR_MALFORMED;
3744 }
3745
3746 uint32_t count;
3747 if (!mDataSource->getUInt32(offset + 4, &count)) {
3748 return ERROR_MALFORMED;
3749 }
3750
3751 if (mMetaKeyMap.size() > 0) {
3752 ALOGW("'keys' atom seen again, discarding existing entries");
3753 mMetaKeyMap.clear();
3754 }
3755
3756 off64_t keyOffset = offset + 8;
3757 off64_t stopOffset = offset + size;
3758 for (size_t i = 1; i <= count; i++) {
3759 if (keyOffset + 8 > stopOffset) {
3760 return ERROR_MALFORMED;
3761 }
3762
3763 uint32_t keySize;
3764 if (!mDataSource->getUInt32(keyOffset, &keySize)
3765 || keySize < 8
3766 || keyOffset + keySize > stopOffset) {
3767 return ERROR_MALFORMED;
3768 }
3769
3770 uint32_t type;
3771 if (!mDataSource->getUInt32(keyOffset + 4, &type)
3772 || type != FOURCC("mdta")) {
3773 return ERROR_MALFORMED;
3774 }
3775
3776 keySize -= 8;
3777 keyOffset += 8;
3778
3779 auto keyData = heapbuffer<uint8_t>(keySize);
3780 if (keyData.get() == NULL) {
3781 return ERROR_MALFORMED;
3782 }
3783 if (mDataSource->readAt(
3784 keyOffset, keyData.get(), keySize) < (ssize_t) keySize) {
3785 return ERROR_MALFORMED;
3786 }
3787
3788 AString key((const char *)keyData.get(), keySize);
3789 mMetaKeyMap.add(i, key);
3790
3791 keyOffset += keySize;
3792 }
3793 return OK;
3794 }
3795
parseQTMetaVal(int32_t keyId,off64_t offset,size_t size)3796 status_t MPEG4Extractor::parseQTMetaVal(
3797 int32_t keyId, off64_t offset, size_t size) {
3798 ssize_t index = mMetaKeyMap.indexOfKey(keyId);
3799 if (index < 0) {
3800 // corresponding key is not present, ignore
3801 return ERROR_MALFORMED;
3802 }
3803
3804 if (size <= 16) {
3805 return ERROR_MALFORMED;
3806 }
3807 uint32_t dataSize;
3808 if (!mDataSource->getUInt32(offset, &dataSize)
3809 || dataSize > size || dataSize <= 16) {
3810 return ERROR_MALFORMED;
3811 }
3812 uint32_t atomFourCC;
3813 if (!mDataSource->getUInt32(offset + 4, &atomFourCC)
3814 || atomFourCC != FOURCC("data")) {
3815 return ERROR_MALFORMED;
3816 }
3817 uint32_t dataType;
3818 if (!mDataSource->getUInt32(offset + 8, &dataType)
3819 || ((dataType & 0xff000000) != 0)) {
3820 // not well-known type
3821 return ERROR_MALFORMED;
3822 }
3823
3824 dataSize -= 16;
3825 offset += 16;
3826
3827 if (dataType == 23 && dataSize >= 4) {
3828 // BE Float32
3829 uint32_t val;
3830 if (!mDataSource->getUInt32(offset, &val)) {
3831 return ERROR_MALFORMED;
3832 }
3833 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
3834 AMediaFormat_setFloat(mFileMetaData, AMEDIAFORMAT_KEY_CAPTURE_RATE, *(float *)&val);
3835 }
3836 } else if (dataType == 67 && dataSize >= 4) {
3837 // BE signed int32
3838 uint32_t val;
3839 if (!mDataSource->getUInt32(offset, &val)) {
3840 return ERROR_MALFORMED;
3841 }
3842 if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
3843 AMediaFormat_setInt32(mFileMetaData,
3844 AMEDIAFORMAT_KEY_TEMPORAL_LAYER_COUNT, val);
3845 }
3846 } else {
3847 // add more keys if needed
3848 ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
3849 }
3850
3851 return OK;
3852 }
3853
parseTrackHeader(off64_t data_offset,off64_t data_size)3854 status_t MPEG4Extractor::parseTrackHeader(
3855 off64_t data_offset, off64_t data_size) {
3856 if (data_size < 4) {
3857 return ERROR_MALFORMED;
3858 }
3859
3860 uint8_t version;
3861 if (mDataSource->readAt(data_offset, &version, 1) < 1) {
3862 return ERROR_IO;
3863 }
3864
3865 size_t dynSize = (version == 1) ? 36 : 24;
3866
3867 uint8_t buffer[36 + 60];
3868
3869 if (data_size != (off64_t)dynSize + 60) {
3870 return ERROR_MALFORMED;
3871 }
3872
3873 if (mDataSource->readAt(
3874 data_offset, buffer, data_size) < (ssize_t)data_size) {
3875 return ERROR_IO;
3876 }
3877
3878 int32_t id;
3879
3880 if (version == 1) {
3881 // we can get ctime value from U64_AT(&buffer[4])
3882 // we can get mtime value from U64_AT(&buffer[12])
3883 id = U32_AT(&buffer[20]);
3884 // we can get duration value from U64_AT(&buffer[28])
3885 } else if (version == 0) {
3886 // we can get ctime value from U32_AT(&buffer[4])
3887 // we can get mtime value from U32_AT(&buffer[8])
3888 id = U32_AT(&buffer[12]);
3889 // we can get duration value from U32_AT(&buffer[20])
3890 } else {
3891 return ERROR_UNSUPPORTED;
3892 }
3893
3894 if (mLastTrack == NULL)
3895 return ERROR_MALFORMED;
3896
3897 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_TRACK_ID, id);
3898
3899 size_t matrixOffset = dynSize + 16;
3900 int32_t a00 = U32_AT(&buffer[matrixOffset]);
3901 int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
3902 int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
3903 int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
3904
3905 #if 0
3906 int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
3907 int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
3908
3909 ALOGI("x' = %.2f * x + %.2f * y + %.2f",
3910 a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
3911 ALOGI("y' = %.2f * x + %.2f * y + %.2f",
3912 a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
3913 #endif
3914
3915 uint32_t rotationDegrees;
3916
3917 static const int32_t kFixedOne = 0x10000;
3918 if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
3919 // Identity, no rotation
3920 rotationDegrees = 0;
3921 } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
3922 rotationDegrees = 90;
3923 } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
3924 rotationDegrees = 270;
3925 } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
3926 rotationDegrees = 180;
3927 } else {
3928 ALOGW("We only support 0,90,180,270 degree rotation matrices");
3929 rotationDegrees = 0;
3930 }
3931
3932 if (rotationDegrees != 0) {
3933 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_ROTATION, rotationDegrees);
3934 }
3935
3936 // Handle presentation display size, which could be different
3937 // from the image size indicated by AMEDIAFORMAT_KEY_WIDTH and AMEDIAFORMAT_KEY_HEIGHT.
3938 uint32_t width = U32_AT(&buffer[dynSize + 52]);
3939 uint32_t height = U32_AT(&buffer[dynSize + 56]);
3940 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_WIDTH, width >> 16);
3941 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_DISPLAY_HEIGHT, height >> 16);
3942
3943 return OK;
3944 }
3945
parseITunesMetaData(off64_t offset,size_t size)3946 status_t MPEG4Extractor::parseITunesMetaData(off64_t offset, size_t size) {
3947 if (size == 0) {
3948 return OK;
3949 }
3950
3951 if (size < 4 || size == SIZE_MAX) {
3952 return ERROR_MALFORMED;
3953 }
3954
3955 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
3956 if (buffer == NULL) {
3957 return ERROR_MALFORMED;
3958 }
3959 if (mDataSource->readAt(
3960 offset, buffer, size) != (ssize_t)size) {
3961 delete[] buffer;
3962 buffer = NULL;
3963
3964 return ERROR_IO;
3965 }
3966
3967 uint32_t flags = U32_AT(buffer);
3968
3969 const char *metadataKey = nullptr;
3970 char chunk[5];
3971 MakeFourCCString(mPath[4], chunk);
3972 ALOGV("meta: %s @ %lld", chunk, (long long)offset);
3973 switch ((int32_t)mPath[4]) {
3974 case FOURCC("\251alb"):
3975 {
3976 metadataKey = AMEDIAFORMAT_KEY_ALBUM;
3977 break;
3978 }
3979 case FOURCC("\251ART"):
3980 {
3981 metadataKey = AMEDIAFORMAT_KEY_ARTIST;
3982 break;
3983 }
3984 case FOURCC("aART"):
3985 {
3986 metadataKey = AMEDIAFORMAT_KEY_ALBUMARTIST;
3987 break;
3988 }
3989 case FOURCC("\251day"):
3990 {
3991 metadataKey = AMEDIAFORMAT_KEY_YEAR;
3992 break;
3993 }
3994 case FOURCC("\251nam"):
3995 {
3996 metadataKey = AMEDIAFORMAT_KEY_TITLE;
3997 break;
3998 }
3999 case FOURCC("\251wrt"):
4000 {
4001 // various open source taggers agree that the "©wrt" tag is for composer, not writer
4002 metadataKey = AMEDIAFORMAT_KEY_COMPOSER;
4003 break;
4004 }
4005 case FOURCC("covr"):
4006 {
4007 metadataKey = AMEDIAFORMAT_KEY_ALBUMART;
4008 break;
4009 }
4010 case FOURCC("gnre"):
4011 case FOURCC("\251gen"):
4012 {
4013 metadataKey = AMEDIAFORMAT_KEY_GENRE;
4014 break;
4015 }
4016 case FOURCC("cpil"):
4017 {
4018 if (size == 9 && flags == 21) {
4019 char tmp[16];
4020 sprintf(tmp, "%d",
4021 (int)buffer[size - 1]);
4022
4023 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_COMPILATION, tmp);
4024 }
4025 break;
4026 }
4027 case FOURCC("trkn"):
4028 {
4029 if (size == 16 && flags == 0) {
4030 char tmp[16];
4031 uint16_t* pTrack = (uint16_t*)&buffer[10];
4032 uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
4033 sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
4034
4035 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4036 }
4037 break;
4038 }
4039 case FOURCC("disk"):
4040 {
4041 if ((size == 14 || size == 16) && flags == 0) {
4042 char tmp[16];
4043 uint16_t* pDisc = (uint16_t*)&buffer[10];
4044 uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
4045 sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
4046
4047 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DISCNUMBER, tmp);
4048 }
4049 break;
4050 }
4051 case FOURCC("----"):
4052 {
4053 buffer[size] = '\0';
4054 switch (mPath[5]) {
4055 case FOURCC("mean"):
4056 mLastCommentMean.setTo((const char *)buffer + 4);
4057 break;
4058 case FOURCC("name"):
4059 mLastCommentName.setTo((const char *)buffer + 4);
4060 break;
4061 case FOURCC("data"):
4062 if (size < 8) {
4063 delete[] buffer;
4064 buffer = NULL;
4065 ALOGE("b/24346430");
4066 return ERROR_MALFORMED;
4067 }
4068 mLastCommentData.setTo((const char *)buffer + 8);
4069 break;
4070 }
4071
4072 // Once we have a set of mean/name/data info, go ahead and process
4073 // it to see if its something we are interested in. Whether or not
4074 // were are interested in the specific tag, make sure to clear out
4075 // the set so we can be ready to process another tuple should one
4076 // show up later in the file.
4077 if ((mLastCommentMean.length() != 0) &&
4078 (mLastCommentName.length() != 0) &&
4079 (mLastCommentData.length() != 0)) {
4080
4081 if (mLastCommentMean == "com.apple.iTunes"
4082 && mLastCommentName == "iTunSMPB") {
4083 int32_t delay, padding;
4084 if (sscanf(mLastCommentData,
4085 " %*x %x %x %*x", &delay, &padding) == 2) {
4086 if (mLastTrack == NULL) {
4087 delete[] buffer;
4088 return ERROR_MALFORMED;
4089 }
4090
4091 AMediaFormat_setInt32(mLastTrack->meta,
4092 AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
4093 AMediaFormat_setInt32(mLastTrack->meta,
4094 AMEDIAFORMAT_KEY_ENCODER_PADDING, padding);
4095 }
4096 }
4097
4098 mLastCommentMean.clear();
4099 mLastCommentName.clear();
4100 mLastCommentData.clear();
4101 }
4102 break;
4103 }
4104
4105 default:
4106 break;
4107 }
4108
4109 void *tmpData;
4110 size_t tmpDataSize;
4111 const char *s;
4112 if (size >= 8 && metadataKey &&
4113 !AMediaFormat_getBuffer(mFileMetaData, metadataKey, &tmpData, &tmpDataSize) &&
4114 !AMediaFormat_getString(mFileMetaData, metadataKey, &s)) {
4115 if (!strcmp(metadataKey, "albumart")) {
4116 AMediaFormat_setBuffer(mFileMetaData, metadataKey,
4117 buffer + 8, size - 8);
4118 } else if (!strcmp(metadataKey, AMEDIAFORMAT_KEY_GENRE)) {
4119 if (flags == 0) {
4120 // uint8_t genre code, iTunes genre codes are
4121 // the standard id3 codes, except they start
4122 // at 1 instead of 0 (e.g. Pop is 14, not 13)
4123 // We use standard id3 numbering, so subtract 1.
4124 int genrecode = (int)buffer[size - 1];
4125 genrecode--;
4126 if (genrecode < 0) {
4127 genrecode = 255; // reserved for 'unknown genre'
4128 }
4129 char genre[10];
4130 sprintf(genre, "%d", genrecode);
4131
4132 AMediaFormat_setString(mFileMetaData, metadataKey, genre);
4133 } else if (flags == 1) {
4134 // custom genre string
4135 buffer[size] = '\0';
4136
4137 AMediaFormat_setString(mFileMetaData,
4138 metadataKey, (const char *)buffer + 8);
4139 }
4140 } else {
4141 buffer[size] = '\0';
4142
4143 AMediaFormat_setString(mFileMetaData,
4144 metadataKey, (const char *)buffer + 8);
4145 }
4146 }
4147
4148 delete[] buffer;
4149 buffer = NULL;
4150
4151 return OK;
4152 }
4153
parseColorInfo(off64_t offset,size_t size)4154 status_t MPEG4Extractor::parseColorInfo(off64_t offset, size_t size) {
4155 if (size < 4 || size == SIZE_MAX || mLastTrack == NULL) {
4156 return ERROR_MALFORMED;
4157 }
4158
4159 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4160 if (buffer == NULL) {
4161 return ERROR_MALFORMED;
4162 }
4163 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4164 delete[] buffer;
4165 buffer = NULL;
4166
4167 return ERROR_IO;
4168 }
4169
4170 int32_t type = U32_AT(&buffer[0]);
4171 if ((type == FOURCC("nclx") && size >= 11)
4172 || (type == FOURCC("nclc") && size >= 10)) {
4173 // only store the first color specification
4174 int32_t existingColor;
4175 if (!AMediaFormat_getInt32(mLastTrack->meta,
4176 AMEDIAFORMAT_KEY_COLOR_RANGE, &existingColor)) {
4177 int32_t primaries = U16_AT(&buffer[4]);
4178 int32_t isotransfer = U16_AT(&buffer[6]);
4179 int32_t coeffs = U16_AT(&buffer[8]);
4180 bool fullRange = (type == FOURCC("nclx")) && (buffer[10] & 128);
4181
4182 int32_t range = 0;
4183 int32_t standard = 0;
4184 int32_t transfer = 0;
4185 ColorUtils::convertIsoColorAspectsToPlatformAspects(
4186 primaries, isotransfer, coeffs, fullRange,
4187 &range, &standard, &transfer);
4188
4189 if (range != 0) {
4190 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_RANGE, range);
4191 }
4192 if (standard != 0) {
4193 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_STANDARD, standard);
4194 }
4195 if (transfer != 0) {
4196 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_COLOR_TRANSFER, transfer);
4197 }
4198 }
4199 }
4200
4201 delete[] buffer;
4202 buffer = NULL;
4203
4204 return OK;
4205 }
4206
parsePaspBox(off64_t offset,size_t size)4207 status_t MPEG4Extractor::parsePaspBox(off64_t offset, size_t size) {
4208 if (size < 8 || size == SIZE_MAX || mLastTrack == NULL) {
4209 return ERROR_MALFORMED;
4210 }
4211
4212 uint32_t data[2]; // hSpacing, vSpacing
4213 if (mDataSource->readAt(offset, data, 8) < 8) {
4214 return ERROR_IO;
4215 }
4216 uint32_t hSpacing = ntohl(data[0]);
4217 uint32_t vSpacing = ntohl(data[1]);
4218
4219 if (hSpacing != 0 && vSpacing != 0) {
4220 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_WIDTH, hSpacing);
4221 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAR_HEIGHT, vSpacing);
4222 }
4223
4224 return OK;
4225 }
4226
parse3GPPMetaData(off64_t offset,size_t size,int depth)4227 status_t MPEG4Extractor::parse3GPPMetaData(off64_t offset, size_t size, int depth) {
4228 if (size < 4 || size == SIZE_MAX) {
4229 return ERROR_MALFORMED;
4230 }
4231
4232 uint8_t *buffer = new (std::nothrow) uint8_t[size + 1];
4233 if (buffer == NULL) {
4234 return ERROR_MALFORMED;
4235 }
4236 if (mDataSource->readAt(
4237 offset, buffer, size) != (ssize_t)size) {
4238 delete[] buffer;
4239 buffer = NULL;
4240
4241 return ERROR_IO;
4242 }
4243
4244 const char *metadataKey = nullptr;
4245 switch (mPath[depth]) {
4246 case FOURCC("titl"):
4247 {
4248 metadataKey = "title";
4249 break;
4250 }
4251 case FOURCC("perf"):
4252 {
4253 metadataKey = "artist";
4254 break;
4255 }
4256 case FOURCC("auth"):
4257 {
4258 metadataKey = "writer";
4259 break;
4260 }
4261 case FOURCC("gnre"):
4262 {
4263 metadataKey = "genre";
4264 break;
4265 }
4266 case FOURCC("albm"):
4267 {
4268 if (buffer[size - 1] != '\0') {
4269 char tmp[4];
4270 sprintf(tmp, "%u", buffer[size - 1]);
4271
4272 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_CDTRACKNUMBER, tmp);
4273 }
4274
4275 metadataKey = "album";
4276 break;
4277 }
4278 case FOURCC("yrrc"):
4279 {
4280 if (size < 6) {
4281 delete[] buffer;
4282 buffer = NULL;
4283 ALOGE("b/62133227");
4284 android_errorWriteLog(0x534e4554, "62133227");
4285 return ERROR_MALFORMED;
4286 }
4287 char tmp[5];
4288 uint16_t year = U16_AT(&buffer[4]);
4289
4290 if (year < 10000) {
4291 sprintf(tmp, "%u", year);
4292
4293 AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_YEAR, tmp);
4294 }
4295 break;
4296 }
4297
4298 default:
4299 break;
4300 }
4301
4302 if (metadataKey) {
4303 bool isUTF8 = true; // Common case
4304 char16_t *framedata = NULL;
4305 int len16 = 0; // Number of UTF-16 characters
4306
4307 // smallest possible valid UTF-16 string w BOM: 0xfe 0xff 0x00 0x00
4308 if (size < 6) {
4309 delete[] buffer;
4310 buffer = NULL;
4311 return ERROR_MALFORMED;
4312 }
4313
4314 if (size - 6 >= 4) {
4315 len16 = ((size - 6) / 2) - 1; // don't include 0x0000 terminator
4316 framedata = (char16_t *)(buffer + 6);
4317 if (0xfffe == *framedata) {
4318 // endianness marker (BOM) doesn't match host endianness
4319 for (int i = 0; i < len16; i++) {
4320 framedata[i] = bswap_16(framedata[i]);
4321 }
4322 // BOM is now swapped to 0xfeff, we will execute next block too
4323 }
4324
4325 if (0xfeff == *framedata) {
4326 // Remove the BOM
4327 framedata++;
4328 len16--;
4329 isUTF8 = false;
4330 }
4331 // else normal non-zero-length UTF-8 string
4332 // we can't handle UTF-16 without BOM as there is no other
4333 // indication of encoding.
4334 }
4335
4336 if (isUTF8) {
4337 buffer[size] = 0;
4338 AMediaFormat_setString(mFileMetaData, metadataKey, (const char *)buffer + 6);
4339 } else {
4340 // Convert from UTF-16 string to UTF-8 string.
4341 String8 tmpUTF8str(framedata, len16);
4342 AMediaFormat_setString(mFileMetaData, metadataKey, tmpUTF8str.string());
4343 }
4344 }
4345
4346 delete[] buffer;
4347 buffer = NULL;
4348
4349 return OK;
4350 }
4351
parseID3v2MetaData(off64_t offset,uint64_t size)4352 void MPEG4Extractor::parseID3v2MetaData(off64_t offset, uint64_t size) {
4353 uint8_t *buffer = new (std::nothrow) uint8_t[size];
4354 if (buffer == NULL) {
4355 return;
4356 }
4357 if (mDataSource->readAt(offset, buffer, size) != (ssize_t)size) {
4358 delete[] buffer;
4359 buffer = NULL;
4360 return;
4361 }
4362
4363 ID3 id3(buffer, size, true /* ignorev1 */);
4364 delete[] buffer;
4365
4366 if (id3.isValid()) {
4367 struct Map {
4368 const char *key;
4369 const char *tag1;
4370 const char *tag2;
4371 };
4372 static const Map kMap[] = {
4373 { AMEDIAFORMAT_KEY_ALBUM, "TALB", "TAL" },
4374 { AMEDIAFORMAT_KEY_ARTIST, "TPE1", "TP1" },
4375 { AMEDIAFORMAT_KEY_ALBUMARTIST, "TPE2", "TP2" },
4376 { AMEDIAFORMAT_KEY_COMPOSER, "TCOM", "TCM" },
4377 { AMEDIAFORMAT_KEY_GENRE, "TCON", "TCO" },
4378 { AMEDIAFORMAT_KEY_TITLE, "TIT2", "TT2" },
4379 { AMEDIAFORMAT_KEY_YEAR, "TYE", "TYER" },
4380 { AMEDIAFORMAT_KEY_AUTHOR, "TXT", "TEXT" },
4381 { AMEDIAFORMAT_KEY_CDTRACKNUMBER, "TRK", "TRCK" },
4382 { AMEDIAFORMAT_KEY_DISCNUMBER, "TPA", "TPOS" },
4383 { AMEDIAFORMAT_KEY_COMPILATION, "TCP", "TCMP" },
4384 };
4385 static const size_t kNumMapEntries = sizeof(kMap) / sizeof(kMap[0]);
4386
4387 for (size_t i = 0; i < kNumMapEntries; ++i) {
4388 const char *ss;
4389 if (!AMediaFormat_getString(mFileMetaData, kMap[i].key, &ss)) {
4390 ID3::Iterator *it = new ID3::Iterator(id3, kMap[i].tag1);
4391 if (it->done()) {
4392 delete it;
4393 it = new ID3::Iterator(id3, kMap[i].tag2);
4394 }
4395
4396 if (it->done()) {
4397 delete it;
4398 continue;
4399 }
4400
4401 String8 s;
4402 it->getString(&s);
4403 delete it;
4404
4405 AMediaFormat_setString(mFileMetaData, kMap[i].key, s);
4406 }
4407 }
4408
4409 size_t dataSize;
4410 String8 mime;
4411 const void *data = id3.getAlbumArt(&dataSize, &mime);
4412
4413 if (data) {
4414 AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_ALBUMART, data, dataSize);
4415 }
4416 }
4417 }
4418
getTrack(size_t index)4419 MediaTrackHelper *MPEG4Extractor::getTrack(size_t index) {
4420 status_t err;
4421 if ((err = readMetaData()) != OK) {
4422 return NULL;
4423 }
4424
4425 Track *track = mFirstTrack;
4426 while (index > 0) {
4427 if (track == NULL) {
4428 return NULL;
4429 }
4430
4431 track = track->next;
4432 --index;
4433 }
4434
4435 if (track == NULL) {
4436 return NULL;
4437 }
4438
4439
4440 Trex *trex = NULL;
4441 int32_t trackId;
4442 if (AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
4443 for (size_t i = 0; i < mTrex.size(); i++) {
4444 Trex *t = &mTrex.editItemAt(i);
4445 if (t->track_ID == (uint32_t) trackId) {
4446 trex = t;
4447 break;
4448 }
4449 }
4450 } else {
4451 ALOGE("b/21657957");
4452 return NULL;
4453 }
4454
4455 ALOGV("getTrack called, pssh: %zu", mPssh.size());
4456
4457 const char *mime;
4458 if (!AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)) {
4459 return NULL;
4460 }
4461
4462 sp<ItemTable> itemTable;
4463 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4464 void *data;
4465 size_t size;
4466 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4467 return NULL;
4468 }
4469
4470 const uint8_t *ptr = (const uint8_t *)data;
4471
4472 if (size < 7 || ptr[0] != 1) { // configurationVersion == 1
4473 return NULL;
4474 }
4475 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
4476 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4477 void *data;
4478 size_t size;
4479 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4480 return NULL;
4481 }
4482
4483 const uint8_t *ptr = (const uint8_t *)data;
4484
4485 if (size < 22 || ptr[0] != 1) { // configurationVersion == 1
4486 return NULL;
4487 }
4488 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
4489 itemTable = mItemTable;
4490 }
4491 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4492 void *data;
4493 size_t size;
4494 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4495 return NULL;
4496 }
4497
4498 const uint8_t *ptr = (const uint8_t *)data;
4499
4500 // dv_major.dv_minor Should be 1.0 or 2.1
4501 if (size != 24 || ((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1))) {
4502 return NULL;
4503 }
4504 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)
4505 || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4506 void *data;
4507 size_t size;
4508 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4509 return NULL;
4510 }
4511
4512 const uint8_t *ptr = (const uint8_t *)data;
4513
4514 if (size < 4 || ptr[0] != 0x81) { // configurationVersion == 1
4515 return NULL;
4516 }
4517 if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF)) {
4518 itemTable = mItemTable;
4519 }
4520 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4521 void *data;
4522 size_t size;
4523 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4524 return NULL;
4525 }
4526
4527 const uint8_t *ptr = (const uint8_t *)data;
4528
4529 if (size < 5 || ptr[0] != 0x01) { // configurationVersion == 1
4530 return NULL;
4531 }
4532 }
4533
4534 ALOGV("track->elst_shift_start_ticks :%" PRIu64, track->elst_shift_start_ticks);
4535
4536 uint64_t elst_initial_empty_edit_ticks = 0;
4537 if (mHeaderTimescale != 0) {
4538 // Convert empty_edit_ticks from movie timescale to media timescale.
4539 uint64_t elst_initial_empty_edit_ticks_mul = 0, elst_initial_empty_edit_ticks_add = 0;
4540 if (__builtin_mul_overflow(track->elst_initial_empty_edit_ticks, track->timescale,
4541 &elst_initial_empty_edit_ticks_mul) ||
4542 __builtin_add_overflow(elst_initial_empty_edit_ticks_mul, (mHeaderTimescale / 2),
4543 &elst_initial_empty_edit_ticks_add)) {
4544 ALOGE("track->elst_initial_empty_edit_ticks overflow");
4545 return nullptr;
4546 }
4547 elst_initial_empty_edit_ticks = elst_initial_empty_edit_ticks_add / mHeaderTimescale;
4548 }
4549 ALOGV("elst_initial_empty_edit_ticks in MediaTimeScale :%" PRIu64,
4550 elst_initial_empty_edit_ticks);
4551
4552 MPEG4Source* source =
4553 new MPEG4Source(track->meta, mDataSource, track->timescale, track->sampleTable,
4554 mSidxEntries, trex, mMoofOffset, itemTable,
4555 track->elst_shift_start_ticks, elst_initial_empty_edit_ticks);
4556 if (source->init() != OK) {
4557 delete source;
4558 return NULL;
4559 }
4560 return source;
4561 }
4562
4563 // static
verifyTrack(Track * track)4564 status_t MPEG4Extractor::verifyTrack(Track *track) {
4565 const char *mime;
4566 CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
4567
4568 void *data;
4569 size_t size;
4570 if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
4571 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size)) {
4572 return ERROR_MALFORMED;
4573 }
4574 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
4575 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
4576 return ERROR_MALFORMED;
4577 }
4578 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
4579 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
4580 return ERROR_MALFORMED;
4581 }
4582 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
4583 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4584 return ERROR_MALFORMED;
4585 }
4586 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_VP9)) {
4587 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
4588 return ERROR_MALFORMED;
4589 }
4590 } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
4591 || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)
4592 || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
4593 if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_ESDS, &data, &size)) {
4594 return ERROR_MALFORMED;
4595 }
4596 }
4597
4598 if (track->sampleTable == NULL || !track->sampleTable->isValid()) {
4599 // Make sure we have all the metadata we need.
4600 ALOGE("stbl atom missing/invalid.");
4601 return ERROR_MALFORMED;
4602 }
4603
4604 if (track->timescale == 0) {
4605 ALOGE("timescale invalid.");
4606 return ERROR_MALFORMED;
4607 }
4608
4609 return OK;
4610 }
4611
4612 typedef enum {
4613 //AOT_NONE = -1,
4614 //AOT_NULL_OBJECT = 0,
4615 //AOT_AAC_MAIN = 1, /**< Main profile */
4616 AOT_AAC_LC = 2, /**< Low Complexity object */
4617 //AOT_AAC_SSR = 3,
4618 //AOT_AAC_LTP = 4,
4619 AOT_SBR = 5,
4620 //AOT_AAC_SCAL = 6,
4621 //AOT_TWIN_VQ = 7,
4622 //AOT_CELP = 8,
4623 //AOT_HVXC = 9,
4624 //AOT_RSVD_10 = 10, /**< (reserved) */
4625 //AOT_RSVD_11 = 11, /**< (reserved) */
4626 //AOT_TTSI = 12, /**< TTSI Object */
4627 //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
4628 //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
4629 //AOT_GEN_MIDI = 15, /**< General MIDI object */
4630 //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
4631 AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
4632 //AOT_RSVD_18 = 18, /**< (reserved) */
4633 //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
4634 AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
4635 //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
4636 AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
4637 AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
4638 //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
4639 //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
4640 //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
4641 //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
4642 //AOT_RSVD_28 = 28, /**< might become SSC */
4643 AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
4644 //AOT_MPEGS = 30, /**< MPEG Surround */
4645
4646 AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
4647
4648 //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
4649 //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
4650 //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
4651 //AOT_RSVD_35 = 35, /**< might become DST */
4652 //AOT_RSVD_36 = 36, /**< might become ALS */
4653 //AOT_AAC_SLS = 37, /**< AAC + SLS */
4654 //AOT_SLS = 38, /**< SLS */
4655 //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
4656
4657 AOT_USAC = 42, /**< USAC */
4658 //AOT_SAOC = 43, /**< SAOC */
4659 //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
4660
4661 //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
4662 } AUDIO_OBJECT_TYPE;
4663
updateAudioTrackInfoFromESDS_MPEG4Audio(const void * esds_data,size_t esds_size)4664 status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
4665 const void *esds_data, size_t esds_size) {
4666 ESDS esds(esds_data, esds_size);
4667
4668 uint8_t objectTypeIndication;
4669 if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
4670 return ERROR_MALFORMED;
4671 }
4672
4673 if (objectTypeIndication == 0xe1) {
4674 // This isn't MPEG4 audio at all, it's QCELP 14k...
4675 if (mLastTrack == NULL)
4676 return ERROR_MALFORMED;
4677
4678 AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_QCELP);
4679 return OK;
4680 }
4681
4682 if (objectTypeIndication == 0x6B || objectTypeIndication == 0x69) {
4683 // mp3 audio
4684 if (mLastTrack == NULL)
4685 return ERROR_MALFORMED;
4686
4687 AMediaFormat_setString(mLastTrack->meta,AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_MPEG);
4688 return OK;
4689 }
4690
4691 if (mLastTrack != NULL) {
4692 uint32_t maxBitrate = 0;
4693 uint32_t avgBitrate = 0;
4694 esds.getBitRate(&maxBitrate, &avgBitrate);
4695 if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
4696 AMediaFormat_setInt32(mLastTrack->meta,
4697 AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
4698 }
4699 if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
4700 AMediaFormat_setInt32(mLastTrack->meta,
4701 AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
4702 }
4703 }
4704
4705 const uint8_t *csd;
4706 size_t csd_size;
4707 if (esds.getCodecSpecificInfo(
4708 (const void **)&csd, &csd_size) != OK) {
4709 return ERROR_MALFORMED;
4710 }
4711
4712 if (kUseHexDump) {
4713 printf("ESD of size %zu\n", csd_size);
4714 hexdump(csd, csd_size);
4715 }
4716
4717 if (csd_size == 0) {
4718 // There's no further information, i.e. no codec specific data
4719 // Let's assume that the information provided in the mpeg4 headers
4720 // is accurate and hope for the best.
4721
4722 return OK;
4723 }
4724
4725 if (csd_size < 2) {
4726 return ERROR_MALFORMED;
4727 }
4728
4729 if (objectTypeIndication == 0xdd) {
4730 // vorbis audio
4731 if (csd[0] != 0x02) {
4732 return ERROR_MALFORMED;
4733 }
4734
4735 // codecInfo starts with two lengths, len1 and len2, that are
4736 // "Xiph-style-lacing encoded"..
4737
4738 size_t offset = 1;
4739 size_t len1 = 0;
4740 while (offset < csd_size && csd[offset] == 0xff) {
4741 if (__builtin_add_overflow(len1, 0xff, &len1)) {
4742 return ERROR_MALFORMED;
4743 }
4744 ++offset;
4745 }
4746 if (offset >= csd_size) {
4747 return ERROR_MALFORMED;
4748 }
4749 if (__builtin_add_overflow(len1, csd[offset], &len1)) {
4750 return ERROR_MALFORMED;
4751 }
4752 ++offset;
4753 if (len1 == 0) {
4754 return ERROR_MALFORMED;
4755 }
4756
4757 size_t len2 = 0;
4758 while (offset < csd_size && csd[offset] == 0xff) {
4759 if (__builtin_add_overflow(len2, 0xff, &len2)) {
4760 return ERROR_MALFORMED;
4761 }
4762 ++offset;
4763 }
4764 if (offset >= csd_size) {
4765 return ERROR_MALFORMED;
4766 }
4767 if (__builtin_add_overflow(len2, csd[offset], &len2)) {
4768 return ERROR_MALFORMED;
4769 }
4770 ++offset;
4771 if (len2 == 0) {
4772 return ERROR_MALFORMED;
4773 }
4774 if (offset >= csd_size || csd[offset] != 0x01) {
4775 return ERROR_MALFORMED;
4776 }
4777
4778 if (mLastTrack == NULL) {
4779 return ERROR_MALFORMED;
4780 }
4781 // formerly kKeyVorbisInfo
4782 AMediaFormat_setBuffer(mLastTrack->meta,
4783 AMEDIAFORMAT_KEY_CSD_0, &csd[offset], len1);
4784
4785 if (__builtin_add_overflow(offset, len1, &offset) ||
4786 offset >= csd_size || csd[offset] != 0x03) {
4787 return ERROR_MALFORMED;
4788 }
4789
4790 if (__builtin_add_overflow(offset, len2, &offset) ||
4791 offset >= csd_size || csd[offset] != 0x05) {
4792 return ERROR_MALFORMED;
4793 }
4794
4795 // formerly kKeyVorbisBooks
4796 AMediaFormat_setBuffer(mLastTrack->meta,
4797 AMEDIAFORMAT_KEY_CSD_1, &csd[offset], csd_size - offset);
4798 AMediaFormat_setString(mLastTrack->meta,
4799 AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_AUDIO_VORBIS);
4800
4801 return OK;
4802 }
4803
4804 static uint32_t kSamplingRate[] = {
4805 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
4806 16000, 12000, 11025, 8000, 7350
4807 };
4808
4809 ABitReader br(csd, csd_size);
4810 uint32_t objectType = br.getBits(5);
4811
4812 if (objectType == AOT_ESCAPE) { // AAC-ELD => additional 6 bits
4813 objectType = 32 + br.getBits(6);
4814 }
4815
4816 if (mLastTrack == NULL)
4817 return ERROR_MALFORMED;
4818
4819 //keep AOT type
4820 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_AAC_PROFILE, objectType);
4821
4822 uint32_t freqIndex = br.getBits(4);
4823
4824 int32_t sampleRate = 0;
4825 int32_t numChannels = 0;
4826 if (freqIndex == 15) {
4827 if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
4828 sampleRate = br.getBits(24);
4829 numChannels = br.getBits(4);
4830 } else {
4831 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4832 numChannels = br.getBits(4);
4833
4834 if (freqIndex == 13 || freqIndex == 14) {
4835 return ERROR_MALFORMED;
4836 }
4837
4838 sampleRate = kSamplingRate[freqIndex];
4839 }
4840
4841 if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 tbl 1.13
4842 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4843 uint32_t extFreqIndex = br.getBits(4);
4844 if (extFreqIndex == 15) {
4845 if (csd_size < 8) {
4846 return ERROR_MALFORMED;
4847 }
4848 if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
4849 br.skipBits(24); // extSampleRate
4850 } else {
4851 if (extFreqIndex == 13 || extFreqIndex == 14) {
4852 return ERROR_MALFORMED;
4853 }
4854 //extSampleRate = kSamplingRate[extFreqIndex];
4855 }
4856 //TODO: save the extension sampling rate value in meta data =>
4857 // AMediaFormat_setInt32(mLastTrack->meta, kKeyExtSampleRate, extSampleRate);
4858 }
4859
4860 switch (numChannels) {
4861 // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
4862 case 0:
4863 case 1:// FC
4864 case 2:// FL FR
4865 case 3:// FC, FL FR
4866 case 4:// FC, FL FR, RC
4867 case 5:// FC, FL FR, SL SR
4868 case 6:// FC, FL FR, SL SR, LFE
4869 //numChannels already contains the right value
4870 break;
4871 case 11:// FC, FL FR, SL SR, RC, LFE
4872 numChannels = 7;
4873 break;
4874 case 7: // FC, FCL FCR, FL FR, SL SR, LFE
4875 case 12:// FC, FL FR, SL SR, RL RR, LFE
4876 case 14:// FC, FL FR, SL SR, LFE, FHL FHR
4877 numChannels = 8;
4878 break;
4879 default:
4880 return ERROR_UNSUPPORTED;
4881 }
4882
4883 {
4884 if (objectType == AOT_SBR || objectType == AOT_PS) {
4885 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4886 objectType = br.getBits(5);
4887
4888 if (objectType == AOT_ESCAPE) {
4889 if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
4890 objectType = 32 + br.getBits(6);
4891 }
4892 }
4893 if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
4894 objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
4895 objectType == AOT_ER_BSAC) {
4896 if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
4897 br.skipBits(1); // frameLengthFlag
4898
4899 const int32_t dependsOnCoreCoder = br.getBits(1);
4900
4901 if (dependsOnCoreCoder ) {
4902 if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
4903 br.skipBits(14); // coreCoderDelay
4904 }
4905
4906 int32_t extensionFlag = -1;
4907 if (br.numBitsLeft() > 0) {
4908 extensionFlag = br.getBits(1);
4909 } else {
4910 switch (objectType) {
4911 // 14496-3 4.5.1.1 extensionFlag
4912 case AOT_AAC_LC:
4913 extensionFlag = 0;
4914 break;
4915 case AOT_ER_AAC_LC:
4916 case AOT_ER_AAC_SCAL:
4917 case AOT_ER_BSAC:
4918 case AOT_ER_AAC_LD:
4919 extensionFlag = 1;
4920 break;
4921 default:
4922 return ERROR_MALFORMED;
4923 break;
4924 }
4925 ALOGW("csd missing extension flag; assuming %d for object type %u.",
4926 extensionFlag, objectType);
4927 }
4928
4929 if (numChannels == 0) {
4930 int32_t channelsEffectiveNum = 0;
4931 int32_t channelsNum = 0;
4932 if (br.numBitsLeft() < 32) {
4933 return ERROR_MALFORMED;
4934 }
4935 br.skipBits(4); // ElementInstanceTag
4936 br.skipBits(2); // Profile
4937 br.skipBits(4); // SamplingFrequencyIndex
4938 const int32_t NumFrontChannelElements = br.getBits(4);
4939 const int32_t NumSideChannelElements = br.getBits(4);
4940 const int32_t NumBackChannelElements = br.getBits(4);
4941 const int32_t NumLfeChannelElements = br.getBits(2);
4942 br.skipBits(3); // NumAssocDataElements
4943 br.skipBits(4); // NumValidCcElements
4944
4945 const int32_t MonoMixdownPresent = br.getBits(1);
4946
4947 if (MonoMixdownPresent != 0) {
4948 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4949 br.skipBits(4); // MonoMixdownElementNumber
4950 }
4951
4952 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4953 const int32_t StereoMixdownPresent = br.getBits(1);
4954 if (StereoMixdownPresent != 0) {
4955 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4956 br.skipBits(4); // StereoMixdownElementNumber
4957 }
4958
4959 if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
4960 const int32_t MatrixMixdownIndexPresent = br.getBits(1);
4961 if (MatrixMixdownIndexPresent != 0) {
4962 if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
4963 br.skipBits(2); // MatrixMixdownIndex
4964 br.skipBits(1); // PseudoSurroundEnable
4965 }
4966
4967 int i;
4968 for (i=0; i < NumFrontChannelElements; i++) {
4969 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4970 const int32_t FrontElementIsCpe = br.getBits(1);
4971 br.skipBits(4); // FrontElementTagSelect
4972 channelsNum += FrontElementIsCpe ? 2 : 1;
4973 }
4974
4975 for (i=0; i < NumSideChannelElements; i++) {
4976 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4977 const int32_t SideElementIsCpe = br.getBits(1);
4978 br.skipBits(4); // SideElementTagSelect
4979 channelsNum += SideElementIsCpe ? 2 : 1;
4980 }
4981
4982 for (i=0; i < NumBackChannelElements; i++) {
4983 if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
4984 const int32_t BackElementIsCpe = br.getBits(1);
4985 br.skipBits(4); // BackElementTagSelect
4986 channelsNum += BackElementIsCpe ? 2 : 1;
4987 }
4988 channelsEffectiveNum = channelsNum;
4989
4990 for (i=0; i < NumLfeChannelElements; i++) {
4991 if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
4992 br.skipBits(4); // LfeElementTagSelect
4993 channelsNum += 1;
4994 }
4995 ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
4996 ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
4997 numChannels = channelsNum;
4998 }
4999 }
5000 }
5001
5002 if (numChannels == 0) {
5003 return ERROR_UNSUPPORTED;
5004 }
5005
5006 if (mLastTrack == NULL)
5007 return ERROR_MALFORMED;
5008
5009 int32_t prevSampleRate;
5010 CHECK(AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &prevSampleRate));
5011
5012 if (prevSampleRate != sampleRate) {
5013 ALOGV("mpeg4 audio sample rate different from previous setting. "
5014 "was: %d, now: %d", prevSampleRate, sampleRate);
5015 }
5016
5017 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sampleRate);
5018
5019 int32_t prevChannelCount;
5020 CHECK(AMediaFormat_getInt32(mLastTrack->meta,
5021 AMEDIAFORMAT_KEY_CHANNEL_COUNT, &prevChannelCount));
5022
5023 if (prevChannelCount != numChannels) {
5024 ALOGV("mpeg4 audio channel count different from previous setting. "
5025 "was: %d, now: %d", prevChannelCount, numChannels);
5026 }
5027
5028 AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, numChannels);
5029
5030 return OK;
5031 }
5032
adjustRawDefaultFrameSize()5033 void MPEG4Extractor::adjustRawDefaultFrameSize() {
5034 int32_t chanCount = 0;
5035 int32_t bitWidth = 0;
5036 const char *mimeStr = NULL;
5037
5038 if(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mimeStr) &&
5039 !strcasecmp(mimeStr, MEDIA_MIMETYPE_AUDIO_RAW) &&
5040 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &chanCount) &&
5041 AMediaFormat_getInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitWidth)) {
5042 // samplesize in stsz may not right , so updade default samplesize
5043 mLastTrack->sampleTable->setPredictSampleSize(chanCount * bitWidth / 8);
5044 }
5045 }
5046
5047 ////////////////////////////////////////////////////////////////////////////////
5048
MPEG4Source(AMediaFormat * format,DataSourceHelper * dataSource,int32_t timeScale,const sp<SampleTable> & sampleTable,Vector<SidxEntry> & sidx,const Trex * trex,off64_t firstMoofOffset,const sp<ItemTable> & itemTable,uint64_t elstShiftStartTicks,uint64_t elstInitialEmptyEditTicks)5049 MPEG4Source::MPEG4Source(
5050 AMediaFormat *format,
5051 DataSourceHelper *dataSource,
5052 int32_t timeScale,
5053 const sp<SampleTable> &sampleTable,
5054 Vector<SidxEntry> &sidx,
5055 const Trex *trex,
5056 off64_t firstMoofOffset,
5057 const sp<ItemTable> &itemTable,
5058 uint64_t elstShiftStartTicks,
5059 uint64_t elstInitialEmptyEditTicks)
5060 : mFormat(format),
5061 mDataSource(dataSource),
5062 mTimescale(timeScale),
5063 mSampleTable(sampleTable),
5064 mCurrentSampleIndex(0),
5065 mCurrentFragmentIndex(0),
5066 mSegments(sidx),
5067 mTrex(trex),
5068 mFirstMoofOffset(firstMoofOffset),
5069 mCurrentMoofOffset(firstMoofOffset),
5070 mCurrentMoofSize(0),
5071 mNextMoofOffset(-1),
5072 mCurrentTime(0),
5073 mDefaultEncryptedByteBlock(0),
5074 mDefaultSkipByteBlock(0),
5075 mCurrentSampleInfoAllocSize(0),
5076 mCurrentSampleInfoSizes(NULL),
5077 mCurrentSampleInfoOffsetsAllocSize(0),
5078 mCurrentSampleInfoOffsets(NULL),
5079 mIsAVC(false),
5080 mIsHEVC(false),
5081 mIsDolbyVision(false),
5082 mIsAC4(false),
5083 mIsPcm(false),
5084 mNALLengthSize(0),
5085 mStarted(false),
5086 mBuffer(NULL),
5087 mSrcBufferSize(0),
5088 mSrcBuffer(NULL),
5089 mItemTable(itemTable),
5090 mElstShiftStartTicks(elstShiftStartTicks),
5091 mElstInitialEmptyEditTicks(elstInitialEmptyEditTicks) {
5092
5093 memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
5094
5095 AMediaFormat_getInt32(mFormat,
5096 AMEDIAFORMAT_KEY_CRYPTO_MODE, &mCryptoMode);
5097 mDefaultIVSize = 0;
5098 AMediaFormat_getInt32(mFormat,
5099 AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &mDefaultIVSize);
5100 void *key;
5101 size_t keysize;
5102 if (AMediaFormat_getBuffer(mFormat,
5103 AMEDIAFORMAT_KEY_CRYPTO_KEY, &key, &keysize)) {
5104 CHECK(keysize <= 16);
5105 memset(mCryptoKey, 0, 16);
5106 memcpy(mCryptoKey, key, keysize);
5107 }
5108
5109 AMediaFormat_getInt32(mFormat,
5110 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, &mDefaultEncryptedByteBlock);
5111 AMediaFormat_getInt32(mFormat,
5112 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, &mDefaultSkipByteBlock);
5113
5114 const char *mime;
5115 bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
5116 CHECK(success);
5117
5118 mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
5119 !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
5120 mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
5121 mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
5122 !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
5123 mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
5124 mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
5125 mIsHeif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) && mItemTable != NULL;
5126 mIsAvif = !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_AVIF) && mItemTable != NULL;
5127
5128 if (mIsAVC) {
5129 void *data;
5130 size_t size;
5131 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5132
5133 const uint8_t *ptr = (const uint8_t *)data;
5134
5135 CHECK(size >= 7);
5136 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5137
5138 // The number of bytes used to encode the length of a NAL unit.
5139 mNALLengthSize = 1 + (ptr[4] & 3);
5140 } else if (mIsHEVC) {
5141 void *data;
5142 size_t size;
5143 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5144
5145 const uint8_t *ptr = (const uint8_t *)data;
5146
5147 CHECK(size >= 22);
5148 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5149
5150 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5151 } else if (mIsDolbyVision) {
5152 ALOGV("%s DolbyVision stream detected", __FUNCTION__);
5153 void *data;
5154 size_t size;
5155 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
5156
5157 const uint8_t *ptr = (const uint8_t *)data;
5158
5159 CHECK(size == 24);
5160
5161 // dv_major.dv_minor Should be 1.0 or 2.1
5162 CHECK(!((ptr[0] != 1 || ptr[1] != 0) && (ptr[0] != 2 || ptr[1] != 1)));
5163
5164 const uint8_t profile = ptr[2] >> 1;
5165 // profile == (unknown,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
5166 // profile == (10) --> AV1
5167 if (profile > 1 && profile < 9) {
5168 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
5169
5170 const uint8_t *ptr = (const uint8_t *)data;
5171
5172 CHECK(size >= 22);
5173 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5174
5175 mNALLengthSize = 1 + (ptr[14 + 7] & 3);
5176 } else if (10 == profile) {
5177 /* AV1 profile nothing to do */
5178 } else {
5179 CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
5180 const uint8_t *ptr = (const uint8_t *)data;
5181
5182 CHECK(size >= 7);
5183 CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
5184 // The number of bytes used to encode the length of a NAL unit.
5185 mNALLengthSize = 1 + (ptr[4] & 3);
5186 }
5187 }
5188
5189 mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
5190 mIsAudio = !strncasecmp(mime, "audio/", 6);
5191
5192 int32_t aacObjectType = -1;
5193
5194 if (AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_AAC_PROFILE, &aacObjectType)) {
5195 mIsUsac = (aacObjectType == AOT_USAC);
5196 }
5197
5198 if (mIsPcm) {
5199 int32_t numChannels = 0;
5200 int32_t bitsPerSample = 0;
5201 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, &bitsPerSample));
5202 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CHANNEL_COUNT, &numChannels));
5203
5204 int32_t bytesPerSample = bitsPerSample >> 3;
5205 int32_t pcmSampleSize = bytesPerSample * numChannels;
5206
5207 size_t maxSampleSize;
5208 status_t err = mSampleTable->getMaxSampleSize(&maxSampleSize);
5209 if (err != OK || maxSampleSize != static_cast<size_t>(pcmSampleSize)
5210 || bitsPerSample != 16) {
5211 // Not supported
5212 mIsPcm = false;
5213 } else {
5214 AMediaFormat_setInt32(mFormat,
5215 AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, pcmSampleSize * kMaxPcmFrameSize);
5216 }
5217 }
5218
5219 CHECK(AMediaFormat_getInt32(format, AMEDIAFORMAT_KEY_TRACK_ID, &mTrackId));
5220 }
5221
init()5222 status_t MPEG4Source::init() {
5223 if (mFirstMoofOffset != 0) {
5224 off64_t offset = mFirstMoofOffset;
5225 return parseChunk(&offset);
5226 }
5227 return OK;
5228 }
5229
~MPEG4Source()5230 MPEG4Source::~MPEG4Source() {
5231 if (mStarted) {
5232 stop();
5233 }
5234 free(mCurrentSampleInfoSizes);
5235 free(mCurrentSampleInfoOffsets);
5236 }
5237
start()5238 media_status_t MPEG4Source::start() {
5239 Mutex::Autolock autoLock(mLock);
5240
5241 CHECK(!mStarted);
5242
5243 int32_t tmp;
5244 CHECK(AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &tmp));
5245 size_t max_size = tmp;
5246
5247 // A somewhat arbitrary limit that should be sufficient for 8k video frames
5248 // If you see the message below for a valid input stream: increase the limit
5249 const size_t kMaxBufferSize = 64 * 1024 * 1024;
5250 if (max_size > kMaxBufferSize) {
5251 ALOGE("bogus max input size: %zu > %zu", max_size, kMaxBufferSize);
5252 return AMEDIA_ERROR_MALFORMED;
5253 }
5254 if (max_size == 0) {
5255 ALOGE("zero max input size");
5256 return AMEDIA_ERROR_MALFORMED;
5257 }
5258
5259 // Allow up to kMaxBuffers, but not if the total exceeds kMaxBufferSize.
5260 const size_t kInitialBuffers = 2;
5261 const size_t kMaxBuffers = 8;
5262 const size_t realMaxBuffers = min(kMaxBufferSize / max_size, kMaxBuffers);
5263 mBufferGroup->init(kInitialBuffers, max_size, realMaxBuffers);
5264 mSrcBuffer = new (std::nothrow) uint8_t[max_size];
5265 if (mSrcBuffer == NULL) {
5266 // file probably specified a bad max size
5267 return AMEDIA_ERROR_MALFORMED;
5268 }
5269 mSrcBufferSize = max_size;
5270
5271 mStarted = true;
5272
5273 return AMEDIA_OK;
5274 }
5275
stop()5276 media_status_t MPEG4Source::stop() {
5277 Mutex::Autolock autoLock(mLock);
5278
5279 CHECK(mStarted);
5280
5281 if (mBuffer != NULL) {
5282 mBuffer->release();
5283 mBuffer = NULL;
5284 }
5285
5286 mSrcBufferSize = 0;
5287 delete[] mSrcBuffer;
5288 mSrcBuffer = NULL;
5289
5290 mStarted = false;
5291 mCurrentSampleIndex = 0;
5292
5293 return AMEDIA_OK;
5294 }
5295
parseChunk(off64_t * offset)5296 status_t MPEG4Source::parseChunk(off64_t *offset) {
5297 uint32_t hdr[2];
5298 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5299 return ERROR_IO;
5300 }
5301 uint64_t chunk_size = ntohl(hdr[0]);
5302 uint32_t chunk_type = ntohl(hdr[1]);
5303 off64_t data_offset = *offset + 8;
5304
5305 if (chunk_size == 1) {
5306 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5307 return ERROR_IO;
5308 }
5309 chunk_size = ntoh64(chunk_size);
5310 data_offset += 8;
5311
5312 if (chunk_size < 16) {
5313 // The smallest valid chunk is 16 bytes long in this case.
5314 return ERROR_MALFORMED;
5315 }
5316 } else if (chunk_size < 8) {
5317 // The smallest valid chunk is 8 bytes long.
5318 return ERROR_MALFORMED;
5319 }
5320
5321 char chunk[5];
5322 MakeFourCCString(chunk_type, chunk);
5323 ALOGV("MPEG4Source chunk %s @ %#llx", chunk, (long long)*offset);
5324
5325 off64_t chunk_data_size = *offset + chunk_size - data_offset;
5326
5327 switch(chunk_type) {
5328
5329 case FOURCC("traf"):
5330 case FOURCC("moof"): {
5331 off64_t stop_offset = *offset + chunk_size;
5332 *offset = data_offset;
5333 if (chunk_type == FOURCC("moof")) {
5334 mCurrentMoofSize = chunk_data_size;
5335 }
5336 while (*offset < stop_offset) {
5337 status_t err = parseChunk(offset);
5338 if (err != OK) {
5339 return err;
5340 }
5341 }
5342 if (chunk_type == FOURCC("moof")) {
5343 // *offset points to the box following this moof. Find the next moof from there.
5344
5345 while (true) {
5346 if (mDataSource->readAt(*offset, hdr, 8) < 8) {
5347 // no more box to the end of file.
5348 break;
5349 }
5350 chunk_size = ntohl(hdr[0]);
5351 chunk_type = ntohl(hdr[1]);
5352 if (chunk_size == 1) {
5353 // ISO/IEC 14496-12:2012, 8.8.4 Movie Fragment Box, moof is a Box
5354 // which is defined in 4.2 Object Structure.
5355 // When chunk_size==1, 8 bytes follows as "largesize".
5356 if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
5357 return ERROR_IO;
5358 }
5359 chunk_size = ntoh64(chunk_size);
5360 if (chunk_size < 16) {
5361 // The smallest valid chunk is 16 bytes long in this case.
5362 return ERROR_MALFORMED;
5363 }
5364 } else if (chunk_size == 0) {
5365 // next box extends to end of file.
5366 } else if (chunk_size < 8) {
5367 // The smallest valid chunk is 8 bytes long in this case.
5368 return ERROR_MALFORMED;
5369 }
5370
5371 if (chunk_type == FOURCC("moof")) {
5372 mNextMoofOffset = *offset;
5373 break;
5374 } else if (chunk_type == FOURCC("mdat")) {
5375 parseChunk(offset);
5376 continue;
5377 } else if (chunk_size == 0) {
5378 break;
5379 }
5380 *offset += chunk_size;
5381 }
5382 }
5383 break;
5384 }
5385
5386 case FOURCC("tfhd"): {
5387 status_t err;
5388 if ((err = parseTrackFragmentHeader(data_offset, chunk_data_size)) != OK) {
5389 return err;
5390 }
5391 *offset += chunk_size;
5392 break;
5393 }
5394
5395 case FOURCC("trun"): {
5396 status_t err;
5397 if (mLastParsedTrackId == mTrackId) {
5398 if ((err = parseTrackFragmentRun(data_offset, chunk_data_size)) != OK) {
5399 return err;
5400 }
5401 }
5402
5403 *offset += chunk_size;
5404 break;
5405 }
5406
5407 case FOURCC("saiz"): {
5408 status_t err;
5409 if ((err = parseSampleAuxiliaryInformationSizes(data_offset, chunk_data_size)) != OK) {
5410 return err;
5411 }
5412 *offset += chunk_size;
5413 break;
5414 }
5415 case FOURCC("saio"): {
5416 status_t err;
5417 if ((err = parseSampleAuxiliaryInformationOffsets(data_offset, chunk_data_size))
5418 != OK) {
5419 return err;
5420 }
5421 *offset += chunk_size;
5422 break;
5423 }
5424
5425 case FOURCC("senc"): {
5426 status_t err;
5427 if ((err = parseSampleEncryption(data_offset, chunk_data_size)) != OK) {
5428 return err;
5429 }
5430 *offset += chunk_size;
5431 break;
5432 }
5433
5434 case FOURCC("mdat"): {
5435 // parse DRM info if present
5436 ALOGV("MPEG4Source::parseChunk mdat");
5437 // if saiz/saoi was previously observed, do something with the sampleinfos
5438 status_t err = OK;
5439 auto kv = mDrmOffsets.lower_bound(*offset);
5440 if (kv != mDrmOffsets.end()) {
5441 auto drmoffset = kv->first;
5442 auto flags = kv->second;
5443 mDrmOffsets.erase(kv);
5444 ALOGV("mdat chunk_size %" PRIu64 " drmoffset %" PRId64 " offset %" PRId64,
5445 chunk_size, drmoffset, *offset);
5446 if (chunk_size >= drmoffset - *offset) {
5447 err = parseClearEncryptedSizes(drmoffset, false, flags,
5448 chunk_size - (drmoffset - *offset));
5449 }
5450 }
5451 if (err != OK) {
5452 return err;
5453 }
5454 *offset += chunk_size;
5455 break;
5456 }
5457
5458 default: {
5459 *offset += chunk_size;
5460 break;
5461 }
5462 }
5463 return OK;
5464 }
5465
parseSampleAuxiliaryInformationSizes(off64_t offset,off64_t size)5466 status_t MPEG4Source::parseSampleAuxiliaryInformationSizes(
5467 off64_t offset, off64_t size) {
5468 ALOGV("parseSampleAuxiliaryInformationSizes");
5469 if (size < 9) {
5470 return -EINVAL;
5471 }
5472 // 14496-12 8.7.12
5473 uint8_t version;
5474 if (mDataSource->readAt(
5475 offset, &version, sizeof(version))
5476 < (ssize_t)sizeof(version)) {
5477 return ERROR_IO;
5478 }
5479
5480 if (version != 0) {
5481 return ERROR_UNSUPPORTED;
5482 }
5483 offset++;
5484 size--;
5485
5486 uint32_t flags;
5487 if (!mDataSource->getUInt24(offset, &flags)) {
5488 return ERROR_IO;
5489 }
5490 offset += 3;
5491 size -= 3;
5492
5493 if (flags & 1) {
5494 if (size < 13) {
5495 return -EINVAL;
5496 }
5497 uint32_t tmp;
5498 if (!mDataSource->getUInt32(offset, &tmp)) {
5499 return ERROR_MALFORMED;
5500 }
5501 mCurrentAuxInfoType = tmp;
5502 offset += 4;
5503 size -= 4;
5504 if (!mDataSource->getUInt32(offset, &tmp)) {
5505 return ERROR_MALFORMED;
5506 }
5507 mCurrentAuxInfoTypeParameter = tmp;
5508 offset += 4;
5509 size -= 4;
5510 }
5511
5512 uint8_t defsize;
5513 if (mDataSource->readAt(offset, &defsize, 1) != 1) {
5514 return ERROR_MALFORMED;
5515 }
5516 mCurrentDefaultSampleInfoSize = defsize;
5517 offset++;
5518 size--;
5519
5520 uint32_t smplcnt;
5521 if (!mDataSource->getUInt32(offset, &smplcnt)) {
5522 return ERROR_MALFORMED;
5523 }
5524 mCurrentSampleInfoCount = smplcnt;
5525 offset += 4;
5526 size -= 4;
5527 if (mCurrentDefaultSampleInfoSize != 0) {
5528 ALOGV("@@@@ using default sample info size of %d", mCurrentDefaultSampleInfoSize);
5529 return OK;
5530 }
5531 if(smplcnt > size) {
5532 ALOGW("b/124525515 - smplcnt(%u) > size(%ld)", (unsigned int)smplcnt, (unsigned long)size);
5533 android_errorWriteLog(0x534e4554, "124525515");
5534 return -EINVAL;
5535 }
5536 if (smplcnt > mCurrentSampleInfoAllocSize) {
5537 uint8_t * newPtr = (uint8_t*) realloc(mCurrentSampleInfoSizes, smplcnt);
5538 if (newPtr == NULL) {
5539 ALOGE("failed to realloc %u -> %u", mCurrentSampleInfoAllocSize, smplcnt);
5540 return NO_MEMORY;
5541 }
5542 mCurrentSampleInfoSizes = newPtr;
5543 mCurrentSampleInfoAllocSize = smplcnt;
5544 }
5545
5546 mDataSource->readAt(offset, mCurrentSampleInfoSizes, smplcnt);
5547 return OK;
5548 }
5549
parseSampleAuxiliaryInformationOffsets(off64_t offset,off64_t size)5550 status_t MPEG4Source::parseSampleAuxiliaryInformationOffsets(
5551 off64_t offset, off64_t size) {
5552 ALOGV("parseSampleAuxiliaryInformationOffsets");
5553 if (size < 8) {
5554 return -EINVAL;
5555 }
5556 // 14496-12 8.7.13
5557 uint8_t version;
5558 if (mDataSource->readAt(offset, &version, sizeof(version)) != 1) {
5559 return ERROR_IO;
5560 }
5561 offset++;
5562 size--;
5563
5564 uint32_t flags;
5565 if (!mDataSource->getUInt24(offset, &flags)) {
5566 return ERROR_IO;
5567 }
5568 offset += 3;
5569 size -= 3;
5570
5571 uint32_t entrycount;
5572 if (!mDataSource->getUInt32(offset, &entrycount)) {
5573 return ERROR_IO;
5574 }
5575 offset += 4;
5576 size -= 4;
5577 if (entrycount == 0) {
5578 return OK;
5579 }
5580 if (entrycount > UINT32_MAX / 8) {
5581 return ERROR_MALFORMED;
5582 }
5583
5584 if (entrycount > mCurrentSampleInfoOffsetsAllocSize) {
5585 uint64_t *newPtr = (uint64_t *)realloc(mCurrentSampleInfoOffsets, entrycount * 8);
5586 if (newPtr == NULL) {
5587 ALOGE("failed to realloc %u -> %u",
5588 mCurrentSampleInfoOffsetsAllocSize, entrycount * 8);
5589 return NO_MEMORY;
5590 }
5591 mCurrentSampleInfoOffsets = newPtr;
5592 mCurrentSampleInfoOffsetsAllocSize = entrycount;
5593 }
5594 mCurrentSampleInfoOffsetCount = entrycount;
5595
5596 if (mCurrentSampleInfoOffsets == NULL) {
5597 return OK;
5598 }
5599
5600 for (size_t i = 0; i < entrycount; i++) {
5601 if (version == 0) {
5602 if (size < 4) {
5603 ALOGW("b/124526959");
5604 android_errorWriteLog(0x534e4554, "124526959");
5605 return -EINVAL;
5606 }
5607 uint32_t tmp;
5608 if (!mDataSource->getUInt32(offset, &tmp)) {
5609 return ERROR_IO;
5610 }
5611 mCurrentSampleInfoOffsets[i] = tmp;
5612 offset += 4;
5613 size -= 4;
5614 } else {
5615 if (size < 8) {
5616 ALOGW("b/124526959");
5617 android_errorWriteLog(0x534e4554, "124526959");
5618 return -EINVAL;
5619 }
5620 uint64_t tmp;
5621 if (!mDataSource->getUInt64(offset, &tmp)) {
5622 return ERROR_IO;
5623 }
5624 mCurrentSampleInfoOffsets[i] = tmp;
5625 offset += 8;
5626 size -= 8;
5627 }
5628 }
5629
5630 // parse clear/encrypted data
5631
5632 off64_t drmoffset = mCurrentSampleInfoOffsets[0]; // from moof
5633
5634 drmoffset += mCurrentMoofOffset;
5635 mDrmOffsets[drmoffset] = flags;
5636 ALOGV("saio drmoffset %" PRId64 " flags %u", drmoffset, flags);
5637
5638 return OK;
5639 }
5640
parseClearEncryptedSizes(off64_t offset,bool isSampleEncryption,uint32_t flags,off64_t size)5641 status_t MPEG4Source::parseClearEncryptedSizes(
5642 off64_t offset, bool isSampleEncryption, uint32_t flags, off64_t size) {
5643
5644 int32_t ivlength;
5645 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, &ivlength)) {
5646 return ERROR_MALFORMED;
5647 }
5648
5649 // only 0, 8 and 16 byte initialization vectors are supported
5650 if (ivlength != 0 && ivlength != 8 && ivlength != 16) {
5651 ALOGW("unsupported IV length: %d", ivlength);
5652 return ERROR_MALFORMED;
5653 }
5654
5655 uint32_t sampleCount = mCurrentSampleInfoCount;
5656 if (isSampleEncryption) {
5657 if (size < 4) {
5658 return ERROR_MALFORMED;
5659 }
5660 if (!mDataSource->getUInt32(offset, &sampleCount)) {
5661 return ERROR_IO;
5662 }
5663 offset += 4;
5664 size -= 4;
5665 }
5666
5667 // read CencSampleAuxiliaryDataFormats
5668 for (size_t i = 0; i < sampleCount; i++) {
5669 if (i >= mCurrentSamples.size()) {
5670 ALOGW("too few samples");
5671 break;
5672 }
5673 Sample *smpl = &mCurrentSamples.editItemAt(i);
5674 if (!smpl->clearsizes.isEmpty()) {
5675 continue;
5676 }
5677
5678 memset(smpl->iv, 0, 16);
5679 if (size < ivlength) {
5680 return ERROR_MALFORMED;
5681 }
5682 if (mDataSource->readAt(offset, smpl->iv, ivlength) != ivlength) {
5683 return ERROR_IO;
5684 }
5685
5686 offset += ivlength;
5687 size -= ivlength;
5688
5689 bool readSubsamples;
5690 if (isSampleEncryption) {
5691 readSubsamples = flags & 2;
5692 } else {
5693 int32_t smplinfosize = mCurrentDefaultSampleInfoSize;
5694 if (smplinfosize == 0) {
5695 smplinfosize = mCurrentSampleInfoSizes[i];
5696 }
5697 readSubsamples = smplinfosize > ivlength;
5698 }
5699
5700 if (readSubsamples) {
5701 uint16_t numsubsamples;
5702 if (size < 2) {
5703 return ERROR_MALFORMED;
5704 }
5705 if (!mDataSource->getUInt16(offset, &numsubsamples)) {
5706 return ERROR_IO;
5707 }
5708 offset += 2;
5709 size -= 2;
5710 for (size_t j = 0; j < numsubsamples; j++) {
5711 uint16_t numclear;
5712 uint32_t numencrypted;
5713 if (size < 6) {
5714 return ERROR_MALFORMED;
5715 }
5716 if (!mDataSource->getUInt16(offset, &numclear)) {
5717 return ERROR_IO;
5718 }
5719 offset += 2;
5720 if (!mDataSource->getUInt32(offset, &numencrypted)) {
5721 return ERROR_IO;
5722 }
5723 offset += 4;
5724 size -= 6;
5725 smpl->clearsizes.add(numclear);
5726 smpl->encryptedsizes.add(numencrypted);
5727 }
5728 } else {
5729 smpl->clearsizes.add(0);
5730 smpl->encryptedsizes.add(smpl->size);
5731 }
5732 }
5733
5734 return OK;
5735 }
5736
parseSampleEncryption(off64_t offset,off64_t chunk_data_size)5737 status_t MPEG4Source::parseSampleEncryption(off64_t offset, off64_t chunk_data_size) {
5738 uint32_t flags;
5739 if (chunk_data_size < 4) {
5740 return ERROR_MALFORMED;
5741 }
5742 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5743 return ERROR_MALFORMED;
5744 }
5745 return parseClearEncryptedSizes(offset + 4, true, flags, chunk_data_size - 4);
5746 }
5747
parseTrackFragmentHeader(off64_t offset,off64_t size)5748 status_t MPEG4Source::parseTrackFragmentHeader(off64_t offset, off64_t size) {
5749
5750 if (size < 8) {
5751 return -EINVAL;
5752 }
5753
5754 uint32_t flags;
5755 if (!mDataSource->getUInt32(offset, &flags)) { // actually version + flags
5756 return ERROR_MALFORMED;
5757 }
5758
5759 if (flags & 0xff000000) {
5760 return -EINVAL;
5761 }
5762
5763 if (!mDataSource->getUInt32(offset + 4, (uint32_t*)&mLastParsedTrackId)) {
5764 return ERROR_MALFORMED;
5765 }
5766
5767 if (mLastParsedTrackId != mTrackId) {
5768 // this is not the right track, skip it
5769 return OK;
5770 }
5771
5772 mTrackFragmentHeaderInfo.mFlags = flags;
5773 mTrackFragmentHeaderInfo.mTrackID = mLastParsedTrackId;
5774 offset += 8;
5775 size -= 8;
5776
5777 ALOGV("fragment header: %08x %08x", flags, mTrackFragmentHeaderInfo.mTrackID);
5778
5779 if (flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent) {
5780 if (size < 8) {
5781 return -EINVAL;
5782 }
5783
5784 if (!mDataSource->getUInt64(offset, &mTrackFragmentHeaderInfo.mBaseDataOffset)) {
5785 return ERROR_MALFORMED;
5786 }
5787 offset += 8;
5788 size -= 8;
5789 }
5790
5791 if (flags & TrackFragmentHeaderInfo::kSampleDescriptionIndexPresent) {
5792 if (size < 4) {
5793 return -EINVAL;
5794 }
5795
5796 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mSampleDescriptionIndex)) {
5797 return ERROR_MALFORMED;
5798 }
5799 offset += 4;
5800 size -= 4;
5801 }
5802
5803 if (flags & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5804 if (size < 4) {
5805 return -EINVAL;
5806 }
5807
5808 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleDuration)) {
5809 return ERROR_MALFORMED;
5810 }
5811 offset += 4;
5812 size -= 4;
5813 }
5814
5815 if (flags & TrackFragmentHeaderInfo::kDefaultSampleSizePresent) {
5816 if (size < 4) {
5817 return -EINVAL;
5818 }
5819
5820 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleSize)) {
5821 return ERROR_MALFORMED;
5822 }
5823 offset += 4;
5824 size -= 4;
5825 }
5826
5827 if (flags & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent) {
5828 if (size < 4) {
5829 return -EINVAL;
5830 }
5831
5832 if (!mDataSource->getUInt32(offset, &mTrackFragmentHeaderInfo.mDefaultSampleFlags)) {
5833 return ERROR_MALFORMED;
5834 }
5835 offset += 4;
5836 size -= 4;
5837 }
5838
5839 if (!(flags & TrackFragmentHeaderInfo::kBaseDataOffsetPresent)) {
5840 mTrackFragmentHeaderInfo.mBaseDataOffset = mCurrentMoofOffset;
5841 }
5842
5843 mTrackFragmentHeaderInfo.mDataOffset = 0;
5844 return OK;
5845 }
5846
parseTrackFragmentRun(off64_t offset,off64_t size)5847 status_t MPEG4Source::parseTrackFragmentRun(off64_t offset, off64_t size) {
5848
5849 ALOGV("MPEG4Source::parseTrackFragmentRun");
5850 if (size < 8) {
5851 return -EINVAL;
5852 }
5853
5854 enum {
5855 kDataOffsetPresent = 0x01,
5856 kFirstSampleFlagsPresent = 0x04,
5857 kSampleDurationPresent = 0x100,
5858 kSampleSizePresent = 0x200,
5859 kSampleFlagsPresent = 0x400,
5860 kSampleCompositionTimeOffsetPresent = 0x800,
5861 };
5862
5863 uint32_t flags;
5864 if (!mDataSource->getUInt32(offset, &flags)) {
5865 return ERROR_MALFORMED;
5866 }
5867 // |version| only affects SampleCompositionTimeOffset field.
5868 // If version == 0, SampleCompositionTimeOffset is uint32_t;
5869 // Otherwise, SampleCompositionTimeOffset is int32_t.
5870 // Sample.compositionOffset is defined as int32_t.
5871 uint8_t version = flags >> 24;
5872 flags &= 0xffffff;
5873 ALOGV("fragment run version: 0x%02x, flags: 0x%06x", version, flags);
5874
5875 if ((flags & kFirstSampleFlagsPresent) && (flags & kSampleFlagsPresent)) {
5876 // These two shall not be used together.
5877 return -EINVAL;
5878 }
5879
5880 uint32_t sampleCount;
5881 if (!mDataSource->getUInt32(offset + 4, &sampleCount)) {
5882 return ERROR_MALFORMED;
5883 }
5884 offset += 8;
5885 size -= 8;
5886
5887 uint64_t dataOffset = mTrackFragmentHeaderInfo.mDataOffset;
5888
5889 uint32_t firstSampleFlags = 0;
5890
5891 if (flags & kDataOffsetPresent) {
5892 if (size < 4) {
5893 return -EINVAL;
5894 }
5895
5896 int32_t dataOffsetDelta;
5897 if (!mDataSource->getUInt32(offset, (uint32_t*)&dataOffsetDelta)) {
5898 return ERROR_MALFORMED;
5899 }
5900
5901 dataOffset = mTrackFragmentHeaderInfo.mBaseDataOffset + dataOffsetDelta;
5902
5903 offset += 4;
5904 size -= 4;
5905 }
5906
5907 if (flags & kFirstSampleFlagsPresent) {
5908 if (size < 4) {
5909 return -EINVAL;
5910 }
5911
5912 if (!mDataSource->getUInt32(offset, &firstSampleFlags)) {
5913 return ERROR_MALFORMED;
5914 }
5915 offset += 4;
5916 size -= 4;
5917 }
5918
5919 uint32_t sampleDuration = 0, sampleSize = 0, sampleFlags = 0,
5920 sampleCtsOffset = 0;
5921
5922 size_t bytesPerSample = 0;
5923 if (flags & kSampleDurationPresent) {
5924 bytesPerSample += 4;
5925 } else if (mTrackFragmentHeaderInfo.mFlags
5926 & TrackFragmentHeaderInfo::kDefaultSampleDurationPresent) {
5927 sampleDuration = mTrackFragmentHeaderInfo.mDefaultSampleDuration;
5928 } else if (mTrex) {
5929 sampleDuration = mTrex->default_sample_duration;
5930 }
5931
5932 if (flags & kSampleSizePresent) {
5933 bytesPerSample += 4;
5934 } else {
5935 sampleSize = mTrackFragmentHeaderInfo.mDefaultSampleSize;
5936 #ifdef VERY_VERY_VERBOSE_LOGGING
5937 // We don't expect this, but also want to avoid spamming the log if
5938 // we hit this case.
5939 if (!(mTrackFragmentHeaderInfo.mFlags
5940 & TrackFragmentHeaderInfo::kDefaultSampleSizePresent)) {
5941 ALOGW("No sample size specified");
5942 }
5943 #endif
5944 }
5945
5946 if (flags & kSampleFlagsPresent) {
5947 bytesPerSample += 4;
5948 } else {
5949 sampleFlags = mTrackFragmentHeaderInfo.mDefaultSampleFlags;
5950 #ifdef VERY_VERY_VERBOSE_LOGGING
5951 // We don't expect this, but also want to avoid spamming the log if
5952 // we hit this case.
5953 if (!(mTrackFragmentHeaderInfo.mFlags
5954 & TrackFragmentHeaderInfo::kDefaultSampleFlagsPresent)) {
5955 ALOGW("No sample flags specified");
5956 }
5957 #endif
5958 }
5959
5960 if (flags & kSampleCompositionTimeOffsetPresent) {
5961 bytesPerSample += 4;
5962 } else {
5963 sampleCtsOffset = 0;
5964 }
5965
5966 if (bytesPerSample != 0) {
5967 if (size < (off64_t)sampleCount * bytesPerSample) {
5968 return -EINVAL;
5969 }
5970 } else {
5971 if (sampleDuration == 0) {
5972 ALOGW("b/123389881 sampleDuration == 0");
5973 android_errorWriteLog(0x534e4554, "124389881 zero");
5974 return -EINVAL;
5975 }
5976
5977 // apply some quick (vs strict legality) checks
5978 //
5979 static constexpr uint32_t kMaxTrunSampleCount = 10000;
5980 if (sampleCount > kMaxTrunSampleCount) {
5981 ALOGW("b/123389881 sampleCount(%u) > kMaxTrunSampleCount(%u)",
5982 sampleCount, kMaxTrunSampleCount);
5983 android_errorWriteLog(0x534e4554, "124389881 count");
5984 return -EINVAL;
5985 }
5986 }
5987
5988 Sample tmp;
5989 for (uint32_t i = 0; i < sampleCount; ++i) {
5990 if (flags & kSampleDurationPresent) {
5991 if (!mDataSource->getUInt32(offset, &sampleDuration)) {
5992 return ERROR_MALFORMED;
5993 }
5994 offset += 4;
5995 }
5996
5997 if (flags & kSampleSizePresent) {
5998 if (!mDataSource->getUInt32(offset, &sampleSize)) {
5999 return ERROR_MALFORMED;
6000 }
6001 offset += 4;
6002 }
6003
6004 if (flags & kSampleFlagsPresent) {
6005 if (!mDataSource->getUInt32(offset, &sampleFlags)) {
6006 return ERROR_MALFORMED;
6007 }
6008 offset += 4;
6009 }
6010
6011 if (flags & kSampleCompositionTimeOffsetPresent) {
6012 if (!mDataSource->getUInt32(offset, &sampleCtsOffset)) {
6013 return ERROR_MALFORMED;
6014 }
6015 offset += 4;
6016 }
6017
6018 ALOGV("adding sample %d at offset 0x%08" PRIx64 ", size %u, duration %u, "
6019 " flags 0x%08x ctsOffset %" PRIu32, i + 1,
6020 dataOffset, sampleSize, sampleDuration,
6021 (flags & kFirstSampleFlagsPresent) && i == 0
6022 ? firstSampleFlags : sampleFlags, sampleCtsOffset);
6023 tmp.offset = dataOffset;
6024 tmp.size = sampleSize;
6025 tmp.duration = sampleDuration;
6026 tmp.compositionOffset = sampleCtsOffset;
6027 memset(tmp.iv, 0, sizeof(tmp.iv));
6028 if (mCurrentSamples.add(tmp) < 0) {
6029 ALOGW("b/123389881 failed saving sample(n=%zu)", mCurrentSamples.size());
6030 android_errorWriteLog(0x534e4554, "124389881 allocation");
6031 mCurrentSamples.clear();
6032 return NO_MEMORY;
6033 }
6034
6035 dataOffset += sampleSize;
6036 }
6037
6038 mTrackFragmentHeaderInfo.mDataOffset = dataOffset;
6039
6040 return OK;
6041 }
6042
getFormat(AMediaFormat * meta)6043 media_status_t MPEG4Source::getFormat(AMediaFormat *meta) {
6044 Mutex::Autolock autoLock(mLock);
6045 AMediaFormat_copy(meta, mFormat);
6046 return AMEDIA_OK;
6047 }
6048
parseNALSize(const uint8_t * data) const6049 size_t MPEG4Source::parseNALSize(const uint8_t *data) const {
6050 switch (mNALLengthSize) {
6051 case 1:
6052 return *data;
6053 case 2:
6054 return U16_AT(data);
6055 case 3:
6056 return ((size_t)data[0] << 16) | U16_AT(&data[1]);
6057 case 4:
6058 return U32_AT(data);
6059 }
6060
6061 // This cannot happen, mNALLengthSize springs to life by adding 1 to
6062 // a 2-bit integer.
6063 CHECK(!"Should not be here.");
6064
6065 return 0;
6066 }
6067
parseHEVCLayerId(const uint8_t * data,size_t size)6068 int32_t MPEG4Source::parseHEVCLayerId(const uint8_t *data, size_t size) {
6069 if (data == nullptr || size < mNALLengthSize + 2) {
6070 return -1;
6071 }
6072
6073 // HEVC NAL-header (16-bit)
6074 // 1 6 6 3
6075 // |-|uuuuuu|------|iii|
6076 // ^ ^
6077 // NAL_type layer_id + 1
6078 //
6079 // Layer-id is non-zero only for Temporal Sub-layer Access pictures (TSA)
6080 enum {
6081 TSA_N = 2,
6082 TSA_R = 3,
6083 STSA_N = 4,
6084 STSA_R = 5,
6085 };
6086
6087 data += mNALLengthSize;
6088 uint16_t nalHeader = data[0] << 8 | data[1];
6089
6090 uint16_t nalType = (nalHeader >> 9) & 0x3Fu;
6091 if (nalType == TSA_N || nalType == TSA_R || nalType == STSA_N || nalType == STSA_R) {
6092 int32_t layerIdPlusOne = nalHeader & 0x7u;
6093 ALOGD_IF(layerIdPlusOne == 0, "got layerId 0 for TSA picture");
6094 return layerIdPlusOne - 1;
6095 }
6096 return 0;
6097 }
6098
read(MediaBufferHelper ** out,const ReadOptions * options)6099 media_status_t MPEG4Source::read(
6100 MediaBufferHelper **out, const ReadOptions *options) {
6101 Mutex::Autolock autoLock(mLock);
6102
6103 CHECK(mStarted);
6104
6105 if (options != nullptr && options->getNonBlocking() && !mBufferGroup->has_buffers()) {
6106 *out = nullptr;
6107 return AMEDIA_ERROR_WOULD_BLOCK;
6108 }
6109
6110 if (mFirstMoofOffset > 0) {
6111 return fragmentedRead(out, options);
6112 }
6113
6114 *out = NULL;
6115
6116 int64_t targetSampleTimeUs = -1;
6117
6118 int64_t seekTimeUs;
6119 ReadOptions::SeekMode mode;
6120
6121 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6122 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6123 if (mIsHeif || mIsAvif) {
6124 CHECK(mSampleTable == NULL);
6125 CHECK(mItemTable != NULL);
6126 int32_t imageIndex;
6127 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_TRACK_ID, &imageIndex)) {
6128 return AMEDIA_ERROR_MALFORMED;
6129 }
6130
6131 status_t err;
6132 if (seekTimeUs >= 0) {
6133 err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
6134 } else {
6135 err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
6136 }
6137 if (err != OK) {
6138 return AMEDIA_ERROR_UNKNOWN;
6139 }
6140 } else {
6141 uint32_t findFlags = 0;
6142 switch (mode) {
6143 case ReadOptions::SEEK_PREVIOUS_SYNC:
6144 findFlags = SampleTable::kFlagBefore;
6145 break;
6146 case ReadOptions::SEEK_NEXT_SYNC:
6147 findFlags = SampleTable::kFlagAfter;
6148 break;
6149 case ReadOptions::SEEK_CLOSEST_SYNC:
6150 case ReadOptions::SEEK_CLOSEST:
6151 findFlags = SampleTable::kFlagClosest;
6152 break;
6153 case ReadOptions::SEEK_FRAME_INDEX:
6154 findFlags = SampleTable::kFlagFrameIndex;
6155 break;
6156 default:
6157 CHECK(!"Should not be here.");
6158 break;
6159 }
6160 if( mode != ReadOptions::SEEK_FRAME_INDEX) {
6161 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6162 if (mElstInitialEmptyEditTicks > 0) {
6163 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6164 mTimescale;
6165 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6166 * Hence, lower bound on seekTimeUs is 0.
6167 */
6168 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6169 }
6170 if (mElstShiftStartTicks > 0) {
6171 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6172 seekTimeUs += elstShiftStartUs;
6173 }
6174 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6175 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6176 elstShiftStartUs);
6177 }
6178
6179 uint32_t sampleIndex;
6180 status_t err = mSampleTable->findSampleAtTime(
6181 seekTimeUs, 1000000, mTimescale,
6182 &sampleIndex, findFlags);
6183
6184 if (mode == ReadOptions::SEEK_CLOSEST
6185 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6186 // We found the closest sample already, now we want the sync
6187 // sample preceding it (or the sample itself of course), even
6188 // if the subsequent sync sample is closer.
6189 findFlags = SampleTable::kFlagBefore;
6190 }
6191
6192 uint32_t syncSampleIndex = sampleIndex;
6193 // assume every non-USAC/non-MPEGH audio sample is a sync sample.
6194 // This works around
6195 // seek issues with files that were incorrectly written with an
6196 // empty or single-sample stss block for the audio track
6197 if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
6198 err = mSampleTable->findSyncSampleNear(
6199 sampleIndex, &syncSampleIndex, findFlags);
6200 }
6201
6202 uint64_t sampleTime;
6203 if (err == OK) {
6204 err = mSampleTable->getMetaDataForSample(
6205 sampleIndex, NULL, NULL, &sampleTime);
6206 }
6207
6208 if (err != OK) {
6209 if (err == ERROR_OUT_OF_RANGE) {
6210 // An attempt to seek past the end of the stream would
6211 // normally cause this ERROR_OUT_OF_RANGE error. Propagating
6212 // this all the way to the MediaPlayer would cause abnormal
6213 // termination. Legacy behaviour appears to be to behave as if
6214 // we had seeked to the end of stream, ending normally.
6215 return AMEDIA_ERROR_END_OF_STREAM;
6216 }
6217 ALOGV("end of stream");
6218 return AMEDIA_ERROR_UNKNOWN;
6219 }
6220
6221 if (mode == ReadOptions::SEEK_CLOSEST
6222 || mode == ReadOptions::SEEK_FRAME_INDEX) {
6223 if (mElstInitialEmptyEditTicks > 0) {
6224 sampleTime += mElstInitialEmptyEditTicks;
6225 }
6226 if (mElstShiftStartTicks > 0){
6227 if (sampleTime > mElstShiftStartTicks) {
6228 sampleTime -= mElstShiftStartTicks;
6229 } else {
6230 sampleTime = 0;
6231 }
6232 }
6233 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
6234 }
6235
6236 #if 0
6237 uint32_t syncSampleTime;
6238 CHECK_EQ(OK, mSampleTable->getMetaDataForSample(
6239 syncSampleIndex, NULL, NULL, &syncSampleTime));
6240
6241 ALOGI("seek to time %lld us => sample at time %lld us, "
6242 "sync sample at time %lld us",
6243 seekTimeUs,
6244 sampleTime * 1000000ll / mTimescale,
6245 syncSampleTime * 1000000ll / mTimescale);
6246 #endif
6247
6248 mCurrentSampleIndex = syncSampleIndex;
6249 }
6250
6251 if (mBuffer != NULL) {
6252 mBuffer->release();
6253 mBuffer = NULL;
6254 }
6255
6256 // fall through
6257 }
6258
6259 off64_t offset = 0;
6260 size_t size = 0;
6261 int64_t cts;
6262 uint64_t stts;
6263 bool isSyncSample;
6264 bool newBuffer = false;
6265 if (mBuffer == NULL) {
6266 newBuffer = true;
6267
6268 status_t err;
6269 if (!mIsHeif && !mIsAvif) {
6270 err = mSampleTable->getMetaDataForSample(mCurrentSampleIndex, &offset, &size,
6271 (uint64_t*)&cts, &isSyncSample, &stts);
6272 if(err == OK) {
6273 if (mElstInitialEmptyEditTicks > 0) {
6274 cts += mElstInitialEmptyEditTicks;
6275 }
6276 if (mElstShiftStartTicks > 0) {
6277 // cts can be negative. for example, initial audio samples for gapless playback.
6278 cts -= (int64_t)mElstShiftStartTicks;
6279 }
6280 }
6281 } else {
6282 err = mItemTable->getImageOffsetAndSize(
6283 options && options->getSeekTo(&seekTimeUs, &mode) ?
6284 &mCurrentSampleIndex : NULL, &offset, &size);
6285
6286 cts = stts = 0;
6287 isSyncSample = 0;
6288 ALOGV("image offset %lld, size %zu", (long long)offset, size);
6289 }
6290
6291 if (err != OK) {
6292 if (err == ERROR_END_OF_STREAM) {
6293 return AMEDIA_ERROR_END_OF_STREAM;
6294 }
6295 return AMEDIA_ERROR_UNKNOWN;
6296 }
6297
6298 err = mBufferGroup->acquire_buffer(&mBuffer);
6299
6300 if (err != OK) {
6301 CHECK(mBuffer == NULL);
6302 return AMEDIA_ERROR_UNKNOWN;
6303 }
6304 if (size > mBuffer->size()) {
6305 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6306 mBuffer->release();
6307 mBuffer = NULL;
6308 return AMEDIA_ERROR_UNKNOWN; // ERROR_BUFFER_TOO_SMALL
6309 }
6310 }
6311
6312 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize) && !mIsAC4) {
6313 if (newBuffer) {
6314 if (mIsPcm) {
6315 // The twos' PCM block reader assumes that all samples has the same size.
6316 uint32_t lastSampleIndexInChunk = mSampleTable->getLastSampleIndexInChunk();
6317 if (lastSampleIndexInChunk < mCurrentSampleIndex) {
6318 mBuffer->release();
6319 mBuffer = nullptr;
6320 return AMEDIA_ERROR_UNKNOWN;
6321 }
6322 uint32_t samplesToRead = lastSampleIndexInChunk - mCurrentSampleIndex + 1;
6323 if (samplesToRead > kMaxPcmFrameSize) {
6324 samplesToRead = kMaxPcmFrameSize;
6325 }
6326
6327 ALOGV("Reading %d PCM frames of size %zu at index %d to stop of chunk at %d",
6328 samplesToRead, size, mCurrentSampleIndex,
6329 mSampleTable->getLastSampleIndexInChunk());
6330
6331 size_t totalSize = samplesToRead * size;
6332 if (mBuffer->size() < totalSize) {
6333 mBuffer->release();
6334 mBuffer = nullptr;
6335 return AMEDIA_ERROR_UNKNOWN;
6336 }
6337 uint8_t* buf = (uint8_t *)mBuffer->data();
6338 ssize_t bytesRead = mDataSource->readAt(offset, buf, totalSize);
6339 if (bytesRead < (ssize_t)totalSize) {
6340 mBuffer->release();
6341 mBuffer = NULL;
6342 return AMEDIA_ERROR_IO;
6343 }
6344
6345 AMediaFormat *meta = mBuffer->meta_data();
6346 AMediaFormat_clear(meta);
6347 AMediaFormat_setInt64(
6348 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6349 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6350
6351 int32_t byteOrder = 0;
6352 bool isGetBigEndian = AMediaFormat_getInt32(mFormat,
6353 AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, &byteOrder);
6354
6355 if (isGetBigEndian && byteOrder == 1) {
6356 // Big-endian -> little-endian
6357 uint16_t *dstData = (uint16_t *)buf;
6358 uint16_t *srcData = (uint16_t *)buf;
6359
6360 for (size_t j = 0; j < bytesRead / sizeof(uint16_t); j++) {
6361 dstData[j] = ntohs(srcData[j]);
6362 }
6363 }
6364
6365 mCurrentSampleIndex += samplesToRead;
6366 mBuffer->set_range(0, totalSize);
6367 } else {
6368 ssize_t num_bytes_read =
6369 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6370
6371 if (num_bytes_read < (ssize_t)size) {
6372 mBuffer->release();
6373 mBuffer = NULL;
6374
6375 return AMEDIA_ERROR_IO;
6376 }
6377
6378 CHECK(mBuffer != NULL);
6379 mBuffer->set_range(0, size);
6380 AMediaFormat *meta = mBuffer->meta_data();
6381 AMediaFormat_clear(meta);
6382 AMediaFormat_setInt64(
6383 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6384 AMediaFormat_setInt64(
6385 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6386
6387 if (targetSampleTimeUs >= 0) {
6388 AMediaFormat_setInt64(
6389 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6390 }
6391
6392 if (isSyncSample) {
6393 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6394 }
6395
6396 AMediaFormat_setInt64(
6397 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/,
6398 offset);
6399
6400 if (mSampleTable != nullptr &&
6401 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6402 AMediaFormat_setInt64(
6403 meta,
6404 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6405 mSampleTable->getLastSampleIndexInChunk());
6406 }
6407
6408 ++mCurrentSampleIndex;
6409 }
6410 }
6411
6412 *out = mBuffer;
6413 mBuffer = NULL;
6414
6415 return AMEDIA_OK;
6416
6417 } else if (mIsAC4) {
6418 CHECK(mBuffer != NULL);
6419 // Make sure there is enough space to write the sync header and the raw frame
6420 if (mBuffer->range_length() < (7 + size)) {
6421 mBuffer->release();
6422 mBuffer = NULL;
6423
6424 return AMEDIA_ERROR_IO;
6425 }
6426
6427 uint8_t *dstData = (uint8_t *)mBuffer->data();
6428 size_t dstOffset = 0;
6429 // Add AC-4 sync header to MPEG4 encapsulated AC-4 raw frame
6430 // AC40 sync word, meaning no CRC at the end of the frame
6431 dstData[dstOffset++] = 0xAC;
6432 dstData[dstOffset++] = 0x40;
6433 dstData[dstOffset++] = 0xFF;
6434 dstData[dstOffset++] = 0xFF;
6435 dstData[dstOffset++] = (uint8_t)((size >> 16) & 0xFF);
6436 dstData[dstOffset++] = (uint8_t)((size >> 8) & 0xFF);
6437 dstData[dstOffset++] = (uint8_t)((size >> 0) & 0xFF);
6438
6439 ssize_t numBytesRead = mDataSource->readAt(offset, dstData + dstOffset, size);
6440 if (numBytesRead != (ssize_t)size) {
6441 mBuffer->release();
6442 mBuffer = NULL;
6443
6444 return AMEDIA_ERROR_IO;
6445 }
6446
6447 mBuffer->set_range(0, dstOffset + size);
6448 AMediaFormat *meta = mBuffer->meta_data();
6449 AMediaFormat_clear(meta);
6450 AMediaFormat_setInt64(
6451 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6452 AMediaFormat_setInt64(
6453 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6454
6455 if (targetSampleTimeUs >= 0) {
6456 AMediaFormat_setInt64(
6457 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6458 }
6459
6460 if (isSyncSample) {
6461 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6462 }
6463
6464 ++mCurrentSampleIndex;
6465
6466 *out = mBuffer;
6467 mBuffer = NULL;
6468
6469 return AMEDIA_OK;
6470 } else {
6471 // Whole NAL units are returned but each fragment is prefixed by
6472 // the start code (0x00 00 00 01).
6473 ssize_t num_bytes_read = 0;
6474 bool mSrcBufferFitsDataToRead = size <= mSrcBufferSize;
6475 if (mSrcBufferFitsDataToRead) {
6476 num_bytes_read = mDataSource->readAt(offset, mSrcBuffer, size);
6477 } else {
6478 // We are trying to read a sample larger than the expected max sample size.
6479 // Fall through and let the failure be handled by the following if.
6480 android_errorWriteLog(0x534e4554, "188893559");
6481 }
6482
6483 if (num_bytes_read < (ssize_t)size) {
6484 mBuffer->release();
6485 mBuffer = NULL;
6486 return mSrcBufferFitsDataToRead ? AMEDIA_ERROR_IO : AMEDIA_ERROR_MALFORMED;
6487 }
6488
6489 uint8_t *dstData = (uint8_t *)mBuffer->data();
6490 size_t srcOffset = 0;
6491 size_t dstOffset = 0;
6492
6493 while (srcOffset < size) {
6494 bool isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6495 size_t nalLength = 0;
6496 if (!isMalFormed) {
6497 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6498 srcOffset += mNALLengthSize;
6499 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength);
6500 }
6501
6502 if (isMalFormed) {
6503 //if nallength abnormal,ignore it.
6504 ALOGW("abnormal nallength, ignore this NAL");
6505 srcOffset = size;
6506 break;
6507 }
6508
6509 if (nalLength == 0) {
6510 continue;
6511 }
6512
6513 if (dstOffset > SIZE_MAX - 4 ||
6514 dstOffset + 4 > SIZE_MAX - nalLength ||
6515 dstOffset + 4 + nalLength > mBuffer->size()) {
6516 ALOGE("b/27208621 : %zu %zu", dstOffset, mBuffer->size());
6517 android_errorWriteLog(0x534e4554, "27208621");
6518 mBuffer->release();
6519 mBuffer = NULL;
6520 return AMEDIA_ERROR_MALFORMED;
6521 }
6522
6523 dstData[dstOffset++] = 0;
6524 dstData[dstOffset++] = 0;
6525 dstData[dstOffset++] = 0;
6526 dstData[dstOffset++] = 1;
6527 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6528 srcOffset += nalLength;
6529 dstOffset += nalLength;
6530 }
6531 CHECK_EQ(srcOffset, size);
6532 CHECK(mBuffer != NULL);
6533 mBuffer->set_range(0, dstOffset);
6534
6535 AMediaFormat *meta = mBuffer->meta_data();
6536 AMediaFormat_clear(meta);
6537 AMediaFormat_setInt64(
6538 meta, AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6539 AMediaFormat_setInt64(
6540 meta, AMEDIAFORMAT_KEY_DURATION, ((long double)stts * 1000000) / mTimescale);
6541
6542 if (targetSampleTimeUs >= 0) {
6543 AMediaFormat_setInt64(
6544 meta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6545 }
6546
6547 if (mIsAVC) {
6548 uint32_t layerId = FindAVCLayerId(
6549 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6550 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6551 } else if (mIsHEVC) {
6552 int32_t layerId = parseHEVCLayerId(
6553 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6554 if (layerId >= 0) {
6555 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6556 }
6557 }
6558
6559 if (isSyncSample) {
6560 AMediaFormat_setInt32(meta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6561 }
6562
6563 AMediaFormat_setInt64(
6564 meta, "sample-file-offset" /*AMEDIAFORMAT_KEY_SAMPLE_FILE_OFFSET*/, offset);
6565
6566 if (mSampleTable != nullptr &&
6567 mCurrentSampleIndex == mSampleTable->getLastSampleIndexInChunk()) {
6568 AMediaFormat_setInt64(
6569 meta,
6570 "last-sample-index-in-chunk" /*AMEDIAFORMAT_KEY_LAST_SAMPLE_INDEX_IN_CHUNK*/,
6571 mSampleTable->getLastSampleIndexInChunk());
6572 }
6573
6574 ++mCurrentSampleIndex;
6575
6576 *out = mBuffer;
6577 mBuffer = NULL;
6578
6579 return AMEDIA_OK;
6580 }
6581 }
6582
fragmentedRead(MediaBufferHelper ** out,const ReadOptions * options)6583 media_status_t MPEG4Source::fragmentedRead(
6584 MediaBufferHelper **out, const ReadOptions *options) {
6585
6586 ALOGV("MPEG4Source::fragmentedRead");
6587
6588 CHECK(mStarted);
6589
6590 *out = NULL;
6591
6592 int64_t targetSampleTimeUs = -1;
6593
6594 int64_t seekTimeUs;
6595 ReadOptions::SeekMode mode;
6596 if (options && options->getSeekTo(&seekTimeUs, &mode)) {
6597 ALOGV("seekTimeUs:%" PRId64, seekTimeUs);
6598 int64_t elstInitialEmptyEditUs = 0, elstShiftStartUs = 0;
6599 if (mElstInitialEmptyEditTicks > 0) {
6600 elstInitialEmptyEditUs = ((long double)mElstInitialEmptyEditTicks * 1000000) /
6601 mTimescale;
6602 /* Sample's composition time from ctts/stts entries are non-negative(>=0).
6603 * Hence, lower bound on seekTimeUs is 0.
6604 */
6605 seekTimeUs = std::max(seekTimeUs - elstInitialEmptyEditUs, (int64_t)0);
6606 }
6607 if (mElstShiftStartTicks > 0){
6608 elstShiftStartUs = ((long double)mElstShiftStartTicks * 1000000) / mTimescale;
6609 seekTimeUs += elstShiftStartUs;
6610 }
6611 ALOGV("shifted seekTimeUs:%" PRId64 ", elstInitialEmptyEditUs:%" PRIu64
6612 ", elstShiftStartUs:%" PRIu64, seekTimeUs, elstInitialEmptyEditUs,
6613 elstShiftStartUs);
6614
6615 int numSidxEntries = mSegments.size();
6616 if (numSidxEntries != 0) {
6617 int64_t totalTime = 0;
6618 off64_t totalOffset = mFirstMoofOffset;
6619 for (int i = 0; i < numSidxEntries; i++) {
6620 const SidxEntry *se = &mSegments[i];
6621 if (totalTime + se->mDurationUs > seekTimeUs) {
6622 // The requested time is somewhere in this segment
6623 if ((mode == ReadOptions::SEEK_NEXT_SYNC && seekTimeUs > totalTime) ||
6624 (mode == ReadOptions::SEEK_CLOSEST_SYNC &&
6625 (seekTimeUs - totalTime) > (totalTime + se->mDurationUs - seekTimeUs))) {
6626 // requested next sync, or closest sync and it was closer to the end of
6627 // this segment
6628 totalTime += se->mDurationUs;
6629 totalOffset += se->mSize;
6630 }
6631 break;
6632 }
6633 totalTime += se->mDurationUs;
6634 totalOffset += se->mSize;
6635 }
6636 mCurrentMoofOffset = totalOffset;
6637 mNextMoofOffset = -1;
6638 mCurrentSamples.clear();
6639 mCurrentSampleIndex = 0;
6640 status_t err = parseChunk(&totalOffset);
6641 if (err != OK) {
6642 return AMEDIA_ERROR_UNKNOWN;
6643 }
6644 mCurrentTime = totalTime * mTimescale / 1000000ll;
6645 } else {
6646 // without sidx boxes, we can only seek to 0
6647 mCurrentMoofOffset = mFirstMoofOffset;
6648 mNextMoofOffset = -1;
6649 mCurrentSamples.clear();
6650 mCurrentSampleIndex = 0;
6651 off64_t tmp = mCurrentMoofOffset;
6652 status_t err = parseChunk(&tmp);
6653 if (err != OK) {
6654 return AMEDIA_ERROR_UNKNOWN;
6655 }
6656 mCurrentTime = 0;
6657 }
6658
6659 if (mBuffer != NULL) {
6660 mBuffer->release();
6661 mBuffer = NULL;
6662 }
6663
6664 // fall through
6665 }
6666
6667 off64_t offset = 0;
6668 size_t size = 0;
6669 int64_t cts = 0;
6670 bool isSyncSample = false;
6671 bool newBuffer = false;
6672 if (mBuffer == NULL || mCurrentSampleIndex >= mCurrentSamples.size()) {
6673 newBuffer = true;
6674
6675 if (mBuffer != NULL) {
6676 mBuffer->release();
6677 mBuffer = NULL;
6678 }
6679 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6680 // move to next fragment if there is one
6681 if (mNextMoofOffset <= mCurrentMoofOffset) {
6682 return AMEDIA_ERROR_END_OF_STREAM;
6683 }
6684 off64_t nextMoof = mNextMoofOffset;
6685 mCurrentMoofOffset = nextMoof;
6686 mCurrentSamples.clear();
6687 mCurrentSampleIndex = 0;
6688 status_t err = parseChunk(&nextMoof);
6689 if (err != OK) {
6690 return AMEDIA_ERROR_UNKNOWN;
6691 }
6692 if (mCurrentSampleIndex >= mCurrentSamples.size()) {
6693 return AMEDIA_ERROR_END_OF_STREAM;
6694 }
6695 }
6696
6697 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6698 offset = smpl->offset;
6699 size = smpl->size;
6700 cts = mCurrentTime + smpl->compositionOffset;
6701
6702 if (mElstInitialEmptyEditTicks > 0) {
6703 cts += mElstInitialEmptyEditTicks;
6704 }
6705 if (mElstShiftStartTicks > 0) {
6706 // cts can be negative. for example, initial audio samples for gapless playback.
6707 cts -= (int64_t)mElstShiftStartTicks;
6708 }
6709
6710 mCurrentTime += smpl->duration;
6711 isSyncSample = (mCurrentSampleIndex == 0);
6712
6713 status_t err = mBufferGroup->acquire_buffer(&mBuffer);
6714
6715 if (err != OK) {
6716 CHECK(mBuffer == NULL);
6717 ALOGV("acquire_buffer returned %d", err);
6718 return AMEDIA_ERROR_UNKNOWN;
6719 }
6720 if (size > mBuffer->size()) {
6721 ALOGE("buffer too small: %zu > %zu", size, mBuffer->size());
6722 mBuffer->release();
6723 mBuffer = NULL;
6724 return AMEDIA_ERROR_UNKNOWN;
6725 }
6726 }
6727
6728 const Sample *smpl = &mCurrentSamples[mCurrentSampleIndex];
6729 AMediaFormat *bufmeta = mBuffer->meta_data();
6730 AMediaFormat_clear(bufmeta);
6731 if (smpl->encryptedsizes.size()) {
6732 // store clear/encrypted lengths in metadata
6733 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_PLAIN_SIZES,
6734 smpl->clearsizes.array(), smpl->clearsizes.size() * sizeof(uint32_t));
6735 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_SIZES,
6736 smpl->encryptedsizes.array(), smpl->encryptedsizes.size() * sizeof(uint32_t));
6737 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, mDefaultIVSize);
6738 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mCryptoMode);
6739 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_KEY, mCryptoKey, 16);
6740 AMediaFormat_setInt32(bufmeta,
6741 AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, mDefaultEncryptedByteBlock);
6742 AMediaFormat_setInt32(bufmeta,
6743 AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, mDefaultSkipByteBlock);
6744
6745 void *iv = NULL;
6746 size_t ivlength = 0;
6747 if (!AMediaFormat_getBuffer(mFormat,
6748 "crypto-iv", &iv, &ivlength)) {
6749 iv = (void *) smpl->iv;
6750 ivlength = 16; // use 16 or the actual size?
6751 }
6752 AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
6753 }
6754
6755 if (!mIsAVC && !mIsHEVC && !(mIsDolbyVision && mNALLengthSize)) {
6756 if (newBuffer) {
6757 if (!isInRange((size_t)0u, mBuffer->size(), size)) {
6758 mBuffer->release();
6759 mBuffer = NULL;
6760
6761 ALOGE("fragmentedRead ERROR_MALFORMED size %zu", size);
6762 return AMEDIA_ERROR_MALFORMED;
6763 }
6764
6765 ssize_t num_bytes_read =
6766 mDataSource->readAt(offset, (uint8_t *)mBuffer->data(), size);
6767
6768 if (num_bytes_read < (ssize_t)size) {
6769 mBuffer->release();
6770 mBuffer = NULL;
6771
6772 ALOGE("i/o error");
6773 return AMEDIA_ERROR_IO;
6774 }
6775
6776 CHECK(mBuffer != NULL);
6777 mBuffer->set_range(0, size);
6778 AMediaFormat_setInt64(bufmeta,
6779 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6780 AMediaFormat_setInt64(bufmeta,
6781 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6782
6783 if (targetSampleTimeUs >= 0) {
6784 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6785 }
6786
6787 if (mIsAVC) {
6788 uint32_t layerId = FindAVCLayerId(
6789 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6790 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6791 } else if (mIsHEVC) {
6792 int32_t layerId = parseHEVCLayerId(
6793 (const uint8_t *)mBuffer->data(), mBuffer->range_length());
6794 if (layerId >= 0) {
6795 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_TEMPORAL_LAYER_ID, layerId);
6796 }
6797 }
6798
6799 if (isSyncSample) {
6800 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6801 }
6802
6803 ++mCurrentSampleIndex;
6804 }
6805
6806 *out = mBuffer;
6807 mBuffer = NULL;
6808
6809 return AMEDIA_OK;
6810
6811 } else {
6812 ALOGV("whole NAL");
6813 // Whole NAL units are returned but each fragment is prefixed by
6814 // the start code (0x00 00 00 01).
6815 ssize_t num_bytes_read = 0;
6816 void *data = NULL;
6817 bool isMalFormed = false;
6818 int32_t max_size;
6819 if (!AMediaFormat_getInt32(mFormat, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, &max_size)
6820 || !isInRange((size_t)0u, (size_t)max_size, size)) {
6821 isMalFormed = true;
6822 } else {
6823 data = mSrcBuffer;
6824 }
6825
6826 if (isMalFormed || data == NULL) {
6827 ALOGE("isMalFormed size %zu", size);
6828 if (mBuffer != NULL) {
6829 mBuffer->release();
6830 mBuffer = NULL;
6831 }
6832 return AMEDIA_ERROR_MALFORMED;
6833 }
6834 num_bytes_read = mDataSource->readAt(offset, data, size);
6835
6836 if (num_bytes_read < (ssize_t)size) {
6837 mBuffer->release();
6838 mBuffer = NULL;
6839
6840 ALOGE("i/o error");
6841 return AMEDIA_ERROR_IO;
6842 }
6843
6844 uint8_t *dstData = (uint8_t *)mBuffer->data();
6845 size_t srcOffset = 0;
6846 size_t dstOffset = 0;
6847
6848 while (srcOffset < size) {
6849 isMalFormed = !isInRange((size_t)0u, size, srcOffset, mNALLengthSize);
6850 size_t nalLength = 0;
6851 if (!isMalFormed) {
6852 nalLength = parseNALSize(&mSrcBuffer[srcOffset]);
6853 srcOffset += mNALLengthSize;
6854 isMalFormed = !isInRange((size_t)0u, size, srcOffset, nalLength)
6855 || !isInRange((size_t)0u, mBuffer->size(), dstOffset, (size_t)4u)
6856 || !isInRange((size_t)0u, mBuffer->size(), dstOffset + 4, nalLength);
6857 }
6858
6859 if (isMalFormed) {
6860 ALOGE("Video is malformed; nalLength %zu", nalLength);
6861 mBuffer->release();
6862 mBuffer = NULL;
6863 return AMEDIA_ERROR_MALFORMED;
6864 }
6865
6866 if (nalLength == 0) {
6867 continue;
6868 }
6869
6870 if (dstOffset > SIZE_MAX - 4 ||
6871 dstOffset + 4 > SIZE_MAX - nalLength ||
6872 dstOffset + 4 + nalLength > mBuffer->size()) {
6873 ALOGE("b/26365349 : %zu %zu", dstOffset, mBuffer->size());
6874 android_errorWriteLog(0x534e4554, "26365349");
6875 mBuffer->release();
6876 mBuffer = NULL;
6877 return AMEDIA_ERROR_MALFORMED;
6878 }
6879
6880 dstData[dstOffset++] = 0;
6881 dstData[dstOffset++] = 0;
6882 dstData[dstOffset++] = 0;
6883 dstData[dstOffset++] = 1;
6884 memcpy(&dstData[dstOffset], &mSrcBuffer[srcOffset], nalLength);
6885 srcOffset += nalLength;
6886 dstOffset += nalLength;
6887 }
6888 CHECK_EQ(srcOffset, size);
6889 CHECK(mBuffer != NULL);
6890 mBuffer->set_range(0, dstOffset);
6891
6892 AMediaFormat *bufmeta = mBuffer->meta_data();
6893 AMediaFormat_setInt64(bufmeta,
6894 AMEDIAFORMAT_KEY_TIME_US, ((long double)cts * 1000000) / mTimescale);
6895 AMediaFormat_setInt64(bufmeta,
6896 AMEDIAFORMAT_KEY_DURATION, ((long double)smpl->duration * 1000000) / mTimescale);
6897
6898 if (targetSampleTimeUs >= 0) {
6899 AMediaFormat_setInt64(bufmeta, AMEDIAFORMAT_KEY_TARGET_TIME, targetSampleTimeUs);
6900 }
6901
6902 if (isSyncSample) {
6903 AMediaFormat_setInt32(bufmeta, AMEDIAFORMAT_KEY_IS_SYNC_FRAME, 1);
6904 }
6905
6906 ++mCurrentSampleIndex;
6907
6908 *out = mBuffer;
6909 mBuffer = NULL;
6910
6911 return AMEDIA_OK;
6912 }
6913
6914 return AMEDIA_OK;
6915 }
6916
findTrackByMimePrefix(const char * mimePrefix)6917 MPEG4Extractor::Track *MPEG4Extractor::findTrackByMimePrefix(
6918 const char *mimePrefix) {
6919 for (Track *track = mFirstTrack; track != NULL; track = track->next) {
6920 const char *mime;
6921 if (AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime)
6922 && !strncasecmp(mime, mimePrefix, strlen(mimePrefix))) {
6923 return track;
6924 }
6925 }
6926
6927 return NULL;
6928 }
6929
LegacySniffMPEG4(DataSourceHelper * source,float * confidence)6930 static bool LegacySniffMPEG4(DataSourceHelper *source, float *confidence) {
6931 uint8_t header[8];
6932
6933 ssize_t n = source->readAt(4, header, sizeof(header));
6934 if (n < (ssize_t)sizeof(header)) {
6935 return false;
6936 }
6937
6938 if (!memcmp(header, "ftyp3gp", 7) || !memcmp(header, "ftypmp42", 8)
6939 || !memcmp(header, "ftyp3gr6", 8) || !memcmp(header, "ftyp3gs6", 8)
6940 || !memcmp(header, "ftyp3ge6", 8) || !memcmp(header, "ftyp3gg6", 8)
6941 || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
6942 || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
6943 || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
6944 || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
6945 || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)
6946 || !memcmp(header, "ftypavif", 8) || !memcmp(header, "ftypavis", 8)) {
6947 *confidence = 0.4;
6948
6949 return true;
6950 }
6951
6952 return false;
6953 }
6954
isCompatibleBrand(uint32_t fourcc)6955 static bool isCompatibleBrand(uint32_t fourcc) {
6956 static const uint32_t kCompatibleBrands[] = {
6957 FOURCC("isom"),
6958 FOURCC("iso2"),
6959 FOURCC("avc1"),
6960 FOURCC("hvc1"),
6961 FOURCC("hev1"),
6962 FOURCC("av01"),
6963 FOURCC("vp09"),
6964 FOURCC("3gp4"),
6965 FOURCC("mp41"),
6966 FOURCC("mp42"),
6967 FOURCC("dash"),
6968 FOURCC("nvr1"),
6969
6970 // Won't promise that the following file types can be played.
6971 // Just give these file types a chance.
6972 FOURCC("qt "), // Apple's QuickTime
6973 FOURCC("MSNV"), // Sony's PSP
6974 FOURCC("wmf "),
6975
6976 FOURCC("3g2a"), // 3GPP2
6977 FOURCC("3g2b"),
6978 FOURCC("mif1"), // HEIF image
6979 FOURCC("heic"), // HEIF image
6980 FOURCC("msf1"), // HEIF image sequence
6981 FOURCC("hevc"), // HEIF image sequence
6982 FOURCC("avif"), // AVIF image
6983 FOURCC("avis"), // AVIF image sequence
6984 };
6985
6986 for (size_t i = 0;
6987 i < sizeof(kCompatibleBrands) / sizeof(kCompatibleBrands[0]);
6988 ++i) {
6989 if (kCompatibleBrands[i] == fourcc) {
6990 return true;
6991 }
6992 }
6993
6994 return false;
6995 }
6996
6997 // Attempt to actually parse the 'ftyp' atom and determine if a suitable
6998 // compatible brand is present.
6999 // Also try to identify where this file's metadata ends
7000 // (end of the 'moov' atom) and report it to the caller as part of
7001 // the metadata.
BetterSniffMPEG4(DataSourceHelper * source,float * confidence)7002 static bool BetterSniffMPEG4(DataSourceHelper *source, float *confidence) {
7003 // We scan up to 128 bytes to identify this file as an MP4.
7004 static const off64_t kMaxScanOffset = 128ll;
7005
7006 off64_t offset = 0ll;
7007 bool foundGoodFileType = false;
7008 off64_t moovAtomEndOffset = -1ll;
7009 bool done = false;
7010
7011 while (!done && offset < kMaxScanOffset) {
7012 uint32_t hdr[2];
7013 if (source->readAt(offset, hdr, 8) < 8) {
7014 return false;
7015 }
7016
7017 uint64_t chunkSize = ntohl(hdr[0]);
7018 uint32_t chunkType = ntohl(hdr[1]);
7019 off64_t chunkDataOffset = offset + 8;
7020
7021 if (chunkSize == 1) {
7022 if (source->readAt(offset + 8, &chunkSize, 8) < 8) {
7023 return false;
7024 }
7025
7026 chunkSize = ntoh64(chunkSize);
7027 chunkDataOffset += 8;
7028
7029 if (chunkSize < 16) {
7030 // The smallest valid chunk is 16 bytes long in this case.
7031 return false;
7032 }
7033 if (chunkSize > INT64_MAX) {
7034 // reject overly large chunk sizes that could
7035 // be interpreted as negative
7036 ALOGE("chunk size too large");
7037 return false;
7038 }
7039
7040 } else if (chunkSize < 8) {
7041 // The smallest valid chunk is 8 bytes long.
7042 return false;
7043 }
7044
7045 // (data_offset - offset) is either 8 or 16
7046 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);
7047 if (chunkDataSize < 0) {
7048 ALOGE("b/23540914");
7049 return false;
7050 }
7051
7052 char chunkstring[5];
7053 MakeFourCCString(chunkType, chunkstring);
7054 ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld",
7055 chunkstring, chunkSize, (long long)offset);
7056 switch (chunkType) {
7057 case FOURCC("ftyp"):
7058 {
7059 if (chunkDataSize < 8) {
7060 return false;
7061 }
7062
7063 uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;
7064 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
7065 if (i == 1) {
7066 // Skip this index, it refers to the minorVersion,
7067 // not a brand.
7068 continue;
7069 }
7070
7071 uint32_t brand;
7072 if (source->readAt(
7073 chunkDataOffset + 4 * i, &brand, 4) < 4) {
7074 return false;
7075 }
7076
7077 brand = ntohl(brand);
7078
7079 if (isCompatibleBrand(brand)) {
7080 foundGoodFileType = true;
7081 break;
7082 }
7083 }
7084
7085 if (!foundGoodFileType) {
7086 return false;
7087 }
7088
7089 break;
7090 }
7091
7092 case FOURCC("moov"):
7093 {
7094 if (__builtin_add_overflow(offset, chunkSize, &moovAtomEndOffset)) {
7095 ALOGE("chunk size + offset would overflow");
7096 return false;
7097 }
7098
7099 done = true;
7100 break;
7101 }
7102
7103 default:
7104 break;
7105 }
7106
7107 if (__builtin_add_overflow(offset, chunkSize, &offset)) {
7108 ALOGE("chunk size + offset would overflow");
7109 return false;
7110 }
7111 }
7112
7113 if (!foundGoodFileType) {
7114 return false;
7115 }
7116
7117 *confidence = 0.4f;
7118
7119 return true;
7120 }
7121
CreateExtractor(CDataSource * source,void *)7122 static CMediaExtractor* CreateExtractor(CDataSource *source, void *) {
7123 return wrap(new MPEG4Extractor(new DataSourceHelper(source)));
7124 }
7125
Sniff(CDataSource * source,float * confidence,void **,FreeMetaFunc *)7126 static CreatorFunc Sniff(
7127 CDataSource *source, float *confidence, void **,
7128 FreeMetaFunc *) {
7129 DataSourceHelper helper(source);
7130 if (BetterSniffMPEG4(&helper, confidence)) {
7131 return CreateExtractor;
7132 }
7133
7134 if (LegacySniffMPEG4(&helper, confidence)) {
7135 ALOGW("Identified supported mpeg4 through LegacySniffMPEG4.");
7136 return CreateExtractor;
7137 }
7138
7139 return NULL;
7140 }
7141
7142 static const char *extensions[] = {
7143 "3g2",
7144 "3ga",
7145 "3gp",
7146 "3gpp",
7147 "3gpp2",
7148 "m4a",
7149 "m4r",
7150 "m4v",
7151 "mov",
7152 "mp4",
7153 "qt",
7154 NULL
7155 };
7156
7157 extern "C" {
7158 // This is the only symbol that needs to be exported
7159 __attribute__ ((visibility ("default")))
GETEXTRACTORDEF()7160 ExtractorDef GETEXTRACTORDEF() {
7161 return {
7162 EXTRACTORDEF_VERSION,
7163 UUID("27575c67-4417-4c54-8d3d-8e626985a164"),
7164 2, // version
7165 "MP4 Extractor",
7166 { .v3 = {Sniff, extensions} },
7167 };
7168 }
7169
7170 } // extern "C"
7171
7172 } // namespace android
7173