XenevaOS
Loading...
Searching...
No Matches
minimp4.h
Go to the documentation of this file.
1#ifndef MINIMP4_H
2#define MINIMP4_H
3/*
4 https://github.com/aspt/mp4
5 https://github.com/lieff/minimp4
6 To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide.
7 This software is distributed without any warranty.
8 See <http://creativecommons.org/publicdomain/zero/1.0/>.
9*/
10
11#include <stdio.h>
12#include <stdint.h>
13#include <stdlib.h>
14#include <string.h>
15#include <limits.h>
16#include <assert.h>
17
18#ifdef __cplusplus
19extern "C" {
20#endif
21
22#define MINIMP4_MIN(x, y) ((x) < (y) ? (x) : (y))
23
24 /************************************************************************/
25 /* Build configuration */
26 /************************************************************************/
27
28#define FIX_BAD_ANDROID_META_BOX 1
29
30#define MAX_CHUNKS_DEPTH 64 // Max chunks nesting level
31
32#define MINIMP4_MAX_SPS 32
33#define MINIMP4_MAX_PPS 256
34
35#define MINIMP4_TRANSCODE_SPS_ID 1
36
37// Support indexing of MP4 files over 4 GB.
38// If disabled, files with 64-bit offset fields is still supported,
39// but error signaled if such field contains too big offset
40// This switch affect return type of MP4D_frame_offset() function
41#define MINIMP4_ALLOW_64BIT 1
42
43#define MP4D_TRACE_SUPPORTED 0 // Debug trace
44#define MP4D_TRACE_TIMESTAMPS 1
45// Support parsing of supplementary information, not necessary for decoding:
46// duration, language, bitrate, metadata tags, etc
47#define MP4D_INFO_SUPPORTED 1
48
49// Enable code, which prints to stdout supplementary MP4 information:
50#define MP4D_PRINT_INFO_SUPPORTED 0
51
52#define MP4D_AVC_SUPPORTED 1
53#define MP4D_HEVC_SUPPORTED 1
54#define MP4D_TIMESTAMPS_SUPPORTED 1
55
56// Enable TrackFragmentBaseMediaDecodeTimeBox support
57#define MP4D_TFDT_SUPPORT 0
58
59/************************************************************************/
60/* Some values of MP4(E/D)_track_t->object_type_indication */
61/************************************************************************/
62// MPEG-4 AAC (all profiles)
63#define MP4_OBJECT_TYPE_AUDIO_ISO_IEC_14496_3 0x40
64// MPEG-2 AAC, Main profile
65#define MP4_OBJECT_TYPE_AUDIO_ISO_IEC_13818_7_MAIN_PROFILE 0x66
66// MPEG-2 AAC, LC profile
67#define MP4_OBJECT_TYPE_AUDIO_ISO_IEC_13818_7_LC_PROFILE 0x67
68// MPEG-2 AAC, SSR profile
69#define MP4_OBJECT_TYPE_AUDIO_ISO_IEC_13818_7_SSR_PROFILE 0x68
70// H.264 (AVC) video
71#define MP4_OBJECT_TYPE_AVC 0x21
72// H.265 (HEVC) video
73#define MP4_OBJECT_TYPE_HEVC 0x23
74// http://www.mp4ra.org/object.html 0xC0-E0 && 0xE2 - 0xFE are specified as "user private"
75#define MP4_OBJECT_TYPE_USER_PRIVATE 0xC0
76
77/************************************************************************/
78/* API error codes */
79/************************************************************************/
80#define MP4E_STATUS_OK 0
81#define MP4E_STATUS_BAD_ARGUMENTS -1
82#define MP4E_STATUS_NO_MEMORY -2
83#define MP4E_STATUS_FILE_WRITE_ERROR -3
84#define MP4E_STATUS_ONLY_ONE_DSI_ALLOWED -4
85
86/************************************************************************/
87/* Sample kind for MP4E_put_sample() */
88/************************************************************************/
89#define MP4E_SAMPLE_DEFAULT 0 // (beginning of) audio or video frame
90#define MP4E_SAMPLE_RANDOM_ACCESS 1 // mark sample as random access point (key frame)
91#define MP4E_SAMPLE_CONTINUATION 2 // Not a sample, but continuation of previous sample (new slice)
92
93/************************************************************************/
94/* Portable 64-bit type definition */
95/************************************************************************/
96
97#if MINIMP4_ALLOW_64BIT
99#else
100 typedef unsigned int boxsize_t;
101#endif
103
104 /************************************************************************/
105 /* Some values of MP4D_track_t->handler_type */
106 /************************************************************************/
107 // Video track : 'vide'
108#define MP4D_HANDLER_TYPE_VIDE 0x76696465
109// Audio track : 'soun'
110#define MP4D_HANDLER_TYPE_SOUN 0x736F756E
111// General MPEG-4 systems streams (without specific handler).
112// Used for private stream, as suggested in http://www.mp4ra.org/handler.html
113#define MP4E_HANDLER_TYPE_GESM 0x6765736D
114
115
116#define HEVC_NAL_VPS 32
117#define HEVC_NAL_SPS 33
118#define HEVC_NAL_PPS 34
119#define HEVC_NAL_BLA_W_LP 16
120#define HEVC_NAL_CRA_NUT 21
121
122/************************************************************************/
123/* Data structures */
124/************************************************************************/
125
126 typedef struct MP4E_mux_tag MP4E_mux_t;
127
134
135 typedef struct
136 {
137 // MP4 object type code, which defined codec class for the track.
138 // See MP4E_OBJECT_TYPE_* values for some codecs
140
141 // Track language: 3-char ISO 639-2T code: "und", "eng", "rus", "jpn" etc...
142 unsigned char language[4];
143
145
146 // 90000 for video, sample rate for audio
147 unsigned time_scale;
149
150 union
151 {
152 struct
153 {
154 // number of channels in the audio track.
155 unsigned channelcount;
156 } a;
157
158 struct
159 {
160 int width;
162 } v;
163 } u;
164
165 } MP4E_track_t;
166
168
169 typedef struct
170 {
171 /************************************************************************/
172 /* mandatory public data */
173 /************************************************************************/
174 // How many 'samples' in the track
175 // The 'sample' is MP4 term, denoting audio or video frame
176 unsigned sample_count;
177
178 // Decoder-specific info (DSI) data
179 unsigned char* dsi;
180
181 // DSI data size
182 unsigned dsi_bytes;
183
184 // MP4 object type code
185 // case 0x00: return "Forbidden";
186 // case 0x01: return "Systems ISO/IEC 14496-1";
187 // case 0x02: return "Systems ISO/IEC 14496-1";
188 // case 0x20: return "Visual ISO/IEC 14496-2";
189 // case 0x40: return "Audio ISO/IEC 14496-3";
190 // case 0x60: return "Visual ISO/IEC 13818-2 Simple Profile";
191 // case 0x61: return "Visual ISO/IEC 13818-2 Main Profile";
192 // case 0x62: return "Visual ISO/IEC 13818-2 SNR Profile";
193 // case 0x63: return "Visual ISO/IEC 13818-2 Spatial Profile";
194 // case 0x64: return "Visual ISO/IEC 13818-2 High Profile";
195 // case 0x65: return "Visual ISO/IEC 13818-2 422 Profile";
196 // case 0x66: return "Audio ISO/IEC 13818-7 Main Profile";
197 // case 0x67: return "Audio ISO/IEC 13818-7 LC Profile";
198 // case 0x68: return "Audio ISO/IEC 13818-7 SSR Profile";
199 // case 0x69: return "Audio ISO/IEC 13818-3";
200 // case 0x6A: return "Visual ISO/IEC 11172-2";
201 // case 0x6B: return "Audio ISO/IEC 11172-3";
202 // case 0x6C: return "Visual ISO/IEC 10918-1";
204
205#if MP4D_INFO_SUPPORTED
206 /************************************************************************/
207 /* informational public data */
208 /************************************************************************/
209 // handler_type when present in a media box, is an integer containing one of
210 // the following values, or a value from a derived specification:
211 // 'vide' Video track
212 // 'soun' Audio track
213 // 'hint' Hint track
214 unsigned handler_type;
215
216 // Track duration: 64-bit value split into 2 variables
217 unsigned duration_hi;
218 unsigned duration_lo;
219
220 // duration scale: duration = timescale*seconds
221 unsigned timescale;
222
223 // Average bitrate, bits per second
225
226 // Track language: 3-char ISO 639-2T code: "und", "eng", "rus", "jpn" etc...
227 unsigned char language[4];
228
229 // MP4 stream type
230 // case 0x00: return "Forbidden";
231 // case 0x01: return "ObjectDescriptorStream";
232 // case 0x02: return "ClockReferenceStream";
233 // case 0x03: return "SceneDescriptionStream";
234 // case 0x04: return "VisualStream";
235 // case 0x05: return "AudioStream";
236 // case 0x06: return "MPEG7Stream";
237 // case 0x07: return "IPMPStream";
238 // case 0x08: return "ObjectContentInfoStream";
239 // case 0x09: return "MPEGJStream";
240 unsigned stream_type;
241
242 union
243 {
244 // for handler_type == 'soun' tracks
245 struct
246 {
247 unsigned channelcount;
249 } audio;
250
251 // for handler_type == 'vide' tracks
252 struct
253 {
254 unsigned width;
255 unsigned height;
256 } video;
257 } SampleDescription;
258#endif
259
260 /************************************************************************/
261 /* private data: MP4 indexes */
262 /************************************************************************/
263 unsigned* entry_size;
264
267
268 unsigned chunk_count;
270
271#if MP4D_TIMESTAMPS_SUPPORTED
272 unsigned* timestamp;
273 unsigned* duration;
274#endif
275
276 } MP4D_track_t;
277
278 typedef struct MP4D_demux_tag
279 {
280 /************************************************************************/
281 /* mandatory public data */
282 /************************************************************************/
286 int (*read_callback)(int64_t offset, void* buffer, size_t size, void* token);
287 void* token;
288
289 unsigned track_count; // number of tracks in the movie
290
291#if MP4D_INFO_SUPPORTED
292 /************************************************************************/
293 /* informational public data */
294 /************************************************************************/
295 // Movie duration: 64-bit value split into 2 variables
296 unsigned duration_hi;
297 unsigned duration_lo;
298
299 // duration scale: duration = timescale*seconds
300 unsigned timescale;
301
302 // Metadata tag (optional)
303 // Tags provided 'as-is', without any re-encoding
304 struct
305 {
306 unsigned char* title;
307 unsigned char* artist;
308 unsigned char* album;
309 unsigned char* year;
310 unsigned char* comment;
311 unsigned char* genre;
313#endif
314
316
318 {
319 unsigned first_chunk;
321 };
322
323 typedef struct
324 {
325 void* sps_cache[MINIMP4_MAX_SPS];
326 void* pps_cache[MINIMP4_MAX_PPS];
327 int sps_bytes[MINIMP4_MAX_SPS];
328 int pps_bytes[MINIMP4_MAX_PPS];
329
330 int map_sps[MINIMP4_MAX_SPS];
331 int map_pps[MINIMP4_MAX_PPS];
332
334
343
344 int mp4_h26x_write_init(mp4_h26x_writer_t* h, MP4E_mux_t* mux, int width, int height, int is_hevc);
346 int mp4_h26x_write_nal(mp4_h26x_writer_t* h, const unsigned char* nal, int length, unsigned timeStamp90kHz_next);
347
348 /************************************************************************/
349 /* API */
350 /************************************************************************/
351
360 int MP4D_open(MP4D_demux_t* mp4, int (*read_callback)(int64_t offset, void* buffer, size_t size, void* token), void* token, int64_t file_size);
361
372 MP4D_file_offset_t MP4D_frame_offset(const MP4D_demux_t* mp4, unsigned int ntrack, unsigned int nsample, unsigned int* frame_bytes, unsigned* timestamp, unsigned* duration);
373
378
390 const void* MP4D_read_sps(const MP4D_demux_t* mp4, unsigned int ntrack, int nsps, int* sps_bytes);
391 const void* MP4D_read_pps(const MP4D_demux_t* mp4, unsigned int ntrack, int npps, int* pps_bytes);
392
393#if MP4D_PRINT_INFO_SUPPORTED
399 void MP4D_printf_info(const MP4D_demux_t* mp4);
400#endif
401
411 MP4E_mux_t* MP4E_open(int sequential_mode_flag, int enable_fragmentation, void* token,
412 int (*write_callback)(int64_t offset, const void* buffer, size_t size, void* token));
413
422 int MP4E_add_track(MP4E_mux_t* mux, const MP4E_track_t* track_data);
423
434 int MP4E_put_sample(MP4E_mux_t* mux, int track_num, const void* data, int data_bytes, int duration, int kind);
435
445
454 int MP4E_set_dsi(MP4E_mux_t* mux, int track_id, const void* dsi, int bytes);
455
461 int MP4E_set_vps(MP4E_mux_t* mux, int track_id, const void* vps, int bytes);
462
468 int MP4E_set_sps(MP4E_mux_t* mux, int track_id, const void* sps, int bytes);
469
475 int MP4E_set_pps(MP4E_mux_t* mux, int track_id, const void* pps, int bytes);
476
482 int MP4E_set_text_comment(MP4E_mux_t* mux, const char* comment);
483
484#ifdef __cplusplus
485}
486#endif
487#endif //MINIMP4_H
488
489#if defined(MINIMP4_IMPLEMENTATION) && !defined(MINIMP4_IMPLEMENTATION_GUARD)
490#define MINIMP4_IMPLEMENTATION_GUARD
491
492#define FOUR_CHAR_INT(a, b, c, d) (((uint32_t)(a) << 24) | ((b) << 16) | ((c) << 8) | (d))
493enum
494{
495 BOX_co64 = FOUR_CHAR_INT('c', 'o', '6', '4'),//ChunkLargeOffsetAtomType
496 BOX_stco = FOUR_CHAR_INT('s', 't', 'c', 'o'),//ChunkOffsetAtomType
497 BOX_crhd = FOUR_CHAR_INT('c', 'r', 'h', 'd'),//ClockReferenceMediaHeaderAtomType
498 BOX_ctts = FOUR_CHAR_INT('c', 't', 't', 's'),//CompositionOffsetAtomType
499 BOX_cprt = FOUR_CHAR_INT('c', 'p', 'r', 't'),//CopyrightAtomType
500 BOX_url_ = FOUR_CHAR_INT('u', 'r', 'l', ' '),//DataEntryURLAtomType
501 BOX_urn_ = FOUR_CHAR_INT('u', 'r', 'n', ' '),//DataEntryURNAtomType
502 BOX_dinf = FOUR_CHAR_INT('d', 'i', 'n', 'f'),//DataInformationAtomType
503 BOX_dref = FOUR_CHAR_INT('d', 'r', 'e', 'f'),//DataReferenceAtomType
504 BOX_stdp = FOUR_CHAR_INT('s', 't', 'd', 'p'),//DegradationPriorityAtomType
505 BOX_edts = FOUR_CHAR_INT('e', 'd', 't', 's'),//EditAtomType
506 BOX_elst = FOUR_CHAR_INT('e', 'l', 's', 't'),//EditListAtomType
507 BOX_uuid = FOUR_CHAR_INT('u', 'u', 'i', 'd'),//ExtendedAtomType
508 BOX_free = FOUR_CHAR_INT('f', 'r', 'e', 'e'),//FreeSpaceAtomType
509 BOX_hdlr = FOUR_CHAR_INT('h', 'd', 'l', 'r'),//HandlerAtomType
510 BOX_hmhd = FOUR_CHAR_INT('h', 'm', 'h', 'd'),//HintMediaHeaderAtomType
511 BOX_hint = FOUR_CHAR_INT('h', 'i', 'n', 't'),//HintTrackReferenceAtomType
512 BOX_mdia = FOUR_CHAR_INT('m', 'd', 'i', 'a'),//MediaAtomType
513 BOX_mdat = FOUR_CHAR_INT('m', 'd', 'a', 't'),//MediaDataAtomType
514 BOX_mdhd = FOUR_CHAR_INT('m', 'd', 'h', 'd'),//MediaHeaderAtomType
515 BOX_minf = FOUR_CHAR_INT('m', 'i', 'n', 'f'),//MediaInformationAtomType
516 BOX_moov = FOUR_CHAR_INT('m', 'o', 'o', 'v'),//MovieAtomType
517 BOX_mvhd = FOUR_CHAR_INT('m', 'v', 'h', 'd'),//MovieHeaderAtomType
518 BOX_stsd = FOUR_CHAR_INT('s', 't', 's', 'd'),//SampleDescriptionAtomType
519 BOX_stsz = FOUR_CHAR_INT('s', 't', 's', 'z'),//SampleSizeAtomType
520 BOX_stz2 = FOUR_CHAR_INT('s', 't', 'z', '2'),//CompactSampleSizeAtomType
521 BOX_stbl = FOUR_CHAR_INT('s', 't', 'b', 'l'),//SampleTableAtomType
522 BOX_stsc = FOUR_CHAR_INT('s', 't', 's', 'c'),//SampleToChunkAtomType
523 BOX_stsh = FOUR_CHAR_INT('s', 't', 's', 'h'),//ShadowSyncAtomType
524 BOX_skip = FOUR_CHAR_INT('s', 'k', 'i', 'p'),//SkipAtomType
525 BOX_smhd = FOUR_CHAR_INT('s', 'm', 'h', 'd'),//SoundMediaHeaderAtomType
526 BOX_stss = FOUR_CHAR_INT('s', 't', 's', 's'),//SyncSampleAtomType
527 BOX_stts = FOUR_CHAR_INT('s', 't', 't', 's'),//TimeToSampleAtomType
528 BOX_trak = FOUR_CHAR_INT('t', 'r', 'a', 'k'),//TrackAtomType
529 BOX_tkhd = FOUR_CHAR_INT('t', 'k', 'h', 'd'),//TrackHeaderAtomType
530 BOX_tref = FOUR_CHAR_INT('t', 'r', 'e', 'f'),//TrackReferenceAtomType
531 BOX_udta = FOUR_CHAR_INT('u', 'd', 't', 'a'),//UserDataAtomType
532 BOX_vmhd = FOUR_CHAR_INT('v', 'm', 'h', 'd'),//VideoMediaHeaderAtomType
533 BOX_url = FOUR_CHAR_INT('u', 'r', 'l', ' '),
534 BOX_urn = FOUR_CHAR_INT('u', 'r', 'n', ' '),
535
536 BOX_gnrv = FOUR_CHAR_INT('g', 'n', 'r', 'v'),//GenericVisualSampleEntryAtomType
537 BOX_gnra = FOUR_CHAR_INT('g', 'n', 'r', 'a'),//GenericAudioSampleEntryAtomType
538
539 //V2 atoms
540 BOX_ftyp = FOUR_CHAR_INT('f', 't', 'y', 'p'),//FileTypeAtomType
541 BOX_padb = FOUR_CHAR_INT('p', 'a', 'd', 'b'),//PaddingBitsAtomType
542
543 //MP4 Atoms
544 BOX_sdhd = FOUR_CHAR_INT('s', 'd', 'h', 'd'),//SceneDescriptionMediaHeaderAtomType
545 BOX_dpnd = FOUR_CHAR_INT('d', 'p', 'n', 'd'),//StreamDependenceAtomType
546 BOX_iods = FOUR_CHAR_INT('i', 'o', 'd', 's'),//ObjectDescriptorAtomType
547 BOX_odhd = FOUR_CHAR_INT('o', 'd', 'h', 'd'),//ObjectDescriptorMediaHeaderAtomType
548 BOX_mpod = FOUR_CHAR_INT('m', 'p', 'o', 'd'),//ODTrackReferenceAtomType
549 BOX_nmhd = FOUR_CHAR_INT('n', 'm', 'h', 'd'),//MPEGMediaHeaderAtomType
550 BOX_esds = FOUR_CHAR_INT('e', 's', 'd', 's'),//ESDAtomType
551 BOX_sync = FOUR_CHAR_INT('s', 'y', 'n', 'c'),//OCRReferenceAtomType
552 BOX_ipir = FOUR_CHAR_INT('i', 'p', 'i', 'r'),//IPIReferenceAtomType
553 BOX_mp4s = FOUR_CHAR_INT('m', 'p', '4', 's'),//MPEGSampleEntryAtomType
554 BOX_mp4a = FOUR_CHAR_INT('m', 'p', '4', 'a'),//MPEGAudioSampleEntryAtomType
555 BOX_mp4v = FOUR_CHAR_INT('m', 'p', '4', 'v'),//MPEGVisualSampleEntryAtomType
556
557 // http://www.itscj.ipsj.or.jp/sc29/open/29view/29n7644t.doc
558 BOX_avc1 = FOUR_CHAR_INT('a', 'v', 'c', '1'),
559 BOX_avc2 = FOUR_CHAR_INT('a', 'v', 'c', '2'),
560 BOX_svc1 = FOUR_CHAR_INT('s', 'v', 'c', '1'),
561 BOX_avcC = FOUR_CHAR_INT('a', 'v', 'c', 'C'),
562 BOX_svcC = FOUR_CHAR_INT('s', 'v', 'c', 'C'),
563 BOX_btrt = FOUR_CHAR_INT('b', 't', 'r', 't'),
564 BOX_m4ds = FOUR_CHAR_INT('m', '4', 'd', 's'),
565 BOX_seib = FOUR_CHAR_INT('s', 'e', 'i', 'b'),
566
567 // H264/HEVC
568 BOX_hev1 = FOUR_CHAR_INT('h', 'e', 'v', '1'),
569 BOX_hvc1 = FOUR_CHAR_INT('h', 'v', 'c', '1'),
570 BOX_hvcC = FOUR_CHAR_INT('h', 'v', 'c', 'C'),
571
572 //3GPP atoms
573 BOX_samr = FOUR_CHAR_INT('s', 'a', 'm', 'r'),//AMRSampleEntryAtomType
574 BOX_sawb = FOUR_CHAR_INT('s', 'a', 'w', 'b'),//WB_AMRSampleEntryAtomType
575 BOX_damr = FOUR_CHAR_INT('d', 'a', 'm', 'r'),//AMRConfigAtomType
576 BOX_s263 = FOUR_CHAR_INT('s', '2', '6', '3'),//H263SampleEntryAtomType
577 BOX_d263 = FOUR_CHAR_INT('d', '2', '6', '3'),//H263ConfigAtomType
578
579 //V2 atoms - Movie Fragments
580 BOX_mvex = FOUR_CHAR_INT('m', 'v', 'e', 'x'),//MovieExtendsAtomType
581 BOX_trex = FOUR_CHAR_INT('t', 'r', 'e', 'x'),//TrackExtendsAtomType
582 BOX_moof = FOUR_CHAR_INT('m', 'o', 'o', 'f'),//MovieFragmentAtomType
583 BOX_mfhd = FOUR_CHAR_INT('m', 'f', 'h', 'd'),//MovieFragmentHeaderAtomType
584 BOX_traf = FOUR_CHAR_INT('t', 'r', 'a', 'f'),//TrackFragmentAtomType
585 BOX_tfhd = FOUR_CHAR_INT('t', 'f', 'h', 'd'),//TrackFragmentHeaderAtomType
586 BOX_tfdt = FOUR_CHAR_INT('t', 'f', 'd', 't'),//TrackFragmentBaseMediaDecodeTimeBox
587 BOX_trun = FOUR_CHAR_INT('t', 'r', 'u', 'n'),//TrackFragmentRunAtomType
588 BOX_mehd = FOUR_CHAR_INT('m', 'e', 'h', 'd'),//MovieExtendsHeaderBox
589
590 // Object Descriptors (OD) data coding
591 // These takes only 1 byte; this implementation translate <od_tag> to
592 // <od_tag> + OD_BASE to keep API uniform and safe for string functions
593 OD_BASE = FOUR_CHAR_INT('$', '$', '$', '0'),//
594 OD_ESD = FOUR_CHAR_INT('$', '$', '$', '3'),//SDescriptor_Tag
595 OD_DCD = FOUR_CHAR_INT('$', '$', '$', '4'),//DecoderConfigDescriptor_Tag
596 OD_DSI = FOUR_CHAR_INT('$', '$', '$', '5'),//DecoderSpecificInfo_Tag
597 OD_SLC = FOUR_CHAR_INT('$', '$', '$', '6'),//SLConfigDescriptor_Tag
598
599 BOX_meta = FOUR_CHAR_INT('m', 'e', 't', 'a'),
600 BOX_ilst = FOUR_CHAR_INT('i', 'l', 's', 't'),
601
602 // Metagata tags, see http://atomicparsley.sourceforge.net/mpeg-4files.html
603 BOX_calb = FOUR_CHAR_INT('\xa9', 'a', 'l', 'b'), // album
604 BOX_cart = FOUR_CHAR_INT('\xa9', 'a', 'r', 't'), // artist
605 BOX_aART = FOUR_CHAR_INT('a', 'A', 'R', 'T'), // album artist
606 BOX_ccmt = FOUR_CHAR_INT('\xa9', 'c', 'm', 't'), // comment
607 BOX_cday = FOUR_CHAR_INT('\xa9', 'd', 'a', 'y'), // year (as string)
608 BOX_cnam = FOUR_CHAR_INT('\xa9', 'n', 'a', 'm'), // title
609 BOX_cgen = FOUR_CHAR_INT('\xa9', 'g', 'e', 'n'), // custom genre (as string or as byte!)
610 BOX_trkn = FOUR_CHAR_INT('t', 'r', 'k', 'n'), // track number (byte)
611 BOX_disk = FOUR_CHAR_INT('d', 'i', 's', 'k'), // disk number (byte)
612 BOX_cwrt = FOUR_CHAR_INT('\xa9', 'w', 'r', 't'), // composer
613 BOX_ctoo = FOUR_CHAR_INT('\xa9', 't', 'o', 'o'), // encoder
614 BOX_tmpo = FOUR_CHAR_INT('t', 'm', 'p', 'o'), // bpm (byte)
615 BOX_cpil = FOUR_CHAR_INT('c', 'p', 'i', 'l'), // compilation (byte)
616 BOX_covr = FOUR_CHAR_INT('c', 'o', 'v', 'r'), // cover art (JPEG/PNG)
617 BOX_rtng = FOUR_CHAR_INT('r', 't', 'n', 'g'), // rating/advisory (byte)
618 BOX_cgrp = FOUR_CHAR_INT('\xa9', 'g', 'r', 'p'), // grouping
619 BOX_stik = FOUR_CHAR_INT('s', 't', 'i', 'k'), // stik (byte) 0 = Movie 1 = Normal 2 = Audiobook 5 = Whacked Bookmark 6 = Music Video 9 = Short Film 10 = TV Show 11 = Booklet 14 = Ringtone
620 BOX_pcst = FOUR_CHAR_INT('p', 'c', 's', 't'), // podcast (byte)
621 BOX_catg = FOUR_CHAR_INT('c', 'a', 't', 'g'), // category
622 BOX_keyw = FOUR_CHAR_INT('k', 'e', 'y', 'w'), // keyword
623 BOX_purl = FOUR_CHAR_INT('p', 'u', 'r', 'l'), // podcast URL (byte)
624 BOX_egid = FOUR_CHAR_INT('e', 'g', 'i', 'd'), // episode global unique ID (byte)
625 BOX_desc = FOUR_CHAR_INT('d', 'e', 's', 'c'), // description
626 BOX_clyr = FOUR_CHAR_INT('\xa9', 'l', 'y', 'r'), // lyrics (may be > 255 bytes)
627 BOX_tven = FOUR_CHAR_INT('t', 'v', 'e', 'n'), // tv episode number
628 BOX_tves = FOUR_CHAR_INT('t', 'v', 'e', 's'), // tv episode (byte)
629 BOX_tvnn = FOUR_CHAR_INT('t', 'v', 'n', 'n'), // tv network name
630 BOX_tvsh = FOUR_CHAR_INT('t', 'v', 's', 'h'), // tv show name
631 BOX_tvsn = FOUR_CHAR_INT('t', 'v', 's', 'n'), // tv season (byte)
632 BOX_purd = FOUR_CHAR_INT('p', 'u', 'r', 'd'), // purchase date
633 BOX_pgap = FOUR_CHAR_INT('p', 'g', 'a', 'p'), // Gapless Playback (byte)
634
635 //BOX_aart = FOUR_CHAR_INT( 'a', 'a', 'r', 't' ), // Album artist
636 BOX_cART = FOUR_CHAR_INT('\xa9', 'A', 'R', 'T'), // artist
637 BOX_gnre = FOUR_CHAR_INT('g', 'n', 'r', 'e'),
638
639 // 3GPP metatags (http://cpansearch.perl.org/src/JHAR/MP4-Info-1.12/Info.pm)
640 BOX_auth = FOUR_CHAR_INT('a', 'u', 't', 'h'), // author
641 BOX_titl = FOUR_CHAR_INT('t', 'i', 't', 'l'), // title
642 BOX_dscp = FOUR_CHAR_INT('d', 's', 'c', 'p'), // description
643 BOX_perf = FOUR_CHAR_INT('p', 'e', 'r', 'f'), // performer
644 BOX_mean = FOUR_CHAR_INT('m', 'e', 'a', 'n'), //
645 BOX_name = FOUR_CHAR_INT('n', 'a', 'm', 'e'), //
646 BOX_data = FOUR_CHAR_INT('d', 'a', 't', 'a'), //
647
648 // these from http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2008-September/053151.html
649 BOX_albm = FOUR_CHAR_INT('a', 'l', 'b', 'm'), // album
650 BOX_yrrc = FOUR_CHAR_INT('y', 'r', 'r', 'c') // album
651};
652
653// Video track : 'vide'
654#define MP4E_HANDLER_TYPE_VIDE 0x76696465
655// Audio track : 'soun'
656#define MP4E_HANDLER_TYPE_SOUN 0x736F756E
657// General MPEG-4 systems streams (without specific handler).
658// Used for private stream, as suggested in http://www.mp4ra.org/handler.html
659#define MP4E_HANDLER_TYPE_GESM 0x6765736D
660
661typedef struct
662{
663 boxsize_t size;
664 boxsize_t offset;
665 unsigned duration;
666 unsigned flag_random_access;
667} sample_t;
668
669typedef struct {
670 unsigned char* data;
671 int bytes;
672 int capacity;
673} minimp4_vector_t;
674
675typedef struct
676{
677 MP4E_track_t info;
678 minimp4_vector_t smpl; // sample descriptor
679 minimp4_vector_t pending_sample;
680
681 minimp4_vector_t vsps; // or dsi for audio
682 minimp4_vector_t vpps; // not used for audio
683 minimp4_vector_t vvps; // used for HEVC
684
685} track_t;
686
687typedef struct MP4E_mux_tag
688{
689 minimp4_vector_t tracks;
690
691 int64_t write_pos;
692 int (*write_callback)(int64_t offset, const void* buffer, size_t size, void* token);
693 void* token;
694 char* text_comment;
695
696 int sequential_mode_flag;
697 int enable_fragmentation; // flag, indicating streaming-friendly 'fragmentation' mode
698 int fragments_count; // # of fragments in 'fragmentation' mode
699
700} MP4E_mux_t;
701
702static const unsigned char box_ftyp[] = {
703#if 1
704 0,0,0,0x18,'f','t','y','p',
705 'm','p','4','2',0,0,0,0,
706 'm','p','4','2','i','s','o','m',
707#else
708 // as in ffmpeg
709 0,0,0,0x20,'f','t','y','p',
710 'i','s','o','m',0,0,2,0,
711 'm','p','4','1','i','s','o','m',
712 'i','s','o','2','a','v','c','1',
713#endif
714};
715
719#define WR(x, n) *p++ = (unsigned char)((x) >> 8*n)
720#define WRITE_1(x) WR(x, 0);
721#define WRITE_2(x) WR(x, 1); WR(x, 0);
722#define WRITE_3(x) WR(x, 2); WR(x, 1); WR(x, 0);
723#define WRITE_4(x) WR(x, 3); WR(x, 2); WR(x, 1); WR(x, 0);
724#define WR4(p, x) (p)[0] = (char)((x) >> 8*3); (p)[1] = (char)((x) >> 8*2); (p)[2] = (char)((x) >> 8*1); (p)[3] = (char)((x));
725
726// Finish atom: update atom size field
727#define END_ATOM --stack; WR4((unsigned char*)*stack, p - *stack);
728
729// Initiate atom: save position of size field on stack
730#define ATOM(x) *stack++ = p; p += 4; WRITE_4(x);
731
732// Atom with 'FullAtomVersionFlags' field
733#define ATOM_FULL(x, flag) ATOM(x); WRITE_4(flag);
734
735#define ERR(func) { int err = func; if (err) return err; }
736
740static int minimp4_vector_init(minimp4_vector_t* h, int capacity)
741{
742 h->bytes = 0;
743 h->capacity = capacity;
744 h->data = capacity ? (unsigned char*)malloc(capacity) : NULL;
745 return !capacity || !!h->data;
746}
747
751static void minimp4_vector_reset(minimp4_vector_t* h)
752{
753 if (h->data)
754 free(h->data);
755 memset(h, 0, sizeof(minimp4_vector_t));
756}
757
761static int minimp4_vector_grow(minimp4_vector_t* h, int bytes)
762{
763 void* p;
764 int new_size = h->capacity * 2 + 1024;
765 if (new_size < h->capacity + bytes)
766 new_size = h->capacity + bytes + 1024;
767 p = realloc(h->data, new_size);
768 if (!p)
769 return 0;
770 h->data = (unsigned char*)p;
771 h->capacity = new_size;
772 return 1;
773}
774
780static unsigned char* minimp4_vector_alloc_tail(minimp4_vector_t* h, int bytes)
781{
782 unsigned char* p;
783 if (!h->data && !minimp4_vector_init(h, 2 * bytes + 1024))
784 return NULL;
785 if ((h->capacity - h->bytes) < bytes && !minimp4_vector_grow(h, bytes))
786 return NULL;
787 assert(h->data);
788 assert((h->capacity - h->bytes) >= bytes);
789 p = h->data + h->bytes;
790 h->bytes += bytes;
791 return p;
792}
793
797static unsigned char* minimp4_vector_put(minimp4_vector_t* h, const void* buf, int bytes)
798{
799 unsigned char* tail = minimp4_vector_alloc_tail(h, bytes);
800 if (tail)
801 memcpy(tail,(void*) buf, bytes);
802 return tail;
803}
804
809MP4E_mux_t* MP4E_open(int sequential_mode_flag, int enable_fragmentation, void* token,
810 int (*write_callback)(int64_t offset, const void* buffer, size_t size, void* token))
811{
812 if (write_callback(0, box_ftyp, sizeof(box_ftyp), token)) // Write fixed header: 'ftyp' box
813 return 0;
814 MP4E_mux_t* mux = (MP4E_mux_t*)malloc(sizeof(MP4E_mux_t));
815 if (!mux)
816 return mux;
817 mux->sequential_mode_flag = sequential_mode_flag || enable_fragmentation;
818 mux->enable_fragmentation = enable_fragmentation;
819 mux->fragments_count = 0;
820 mux->write_callback = write_callback;
821 mux->token = token;
822 mux->text_comment = NULL;
823 mux->write_pos = sizeof(box_ftyp);
824
825 if (!mux->sequential_mode_flag)
826 { // Write filler, which would be updated later
827 if (mux->write_callback(mux->write_pos, box_ftyp, 8, mux->token))
828 {
829 free(mux);
830 return 0;
831 }
832 mux->write_pos += 16; // box_ftyp + box_free for 32bit or 64bit size encoding
833 }
834 minimp4_vector_init(&mux->tracks, 2 * sizeof(track_t));
835 return mux;
836}
837
841int MP4E_add_track(MP4E_mux_t* mux, const MP4E_track_t* track_data)
842{
843 track_t* tr;
844 int ntr = mux->tracks.bytes / sizeof(track_t);
845
846 if (!mux || !track_data)
848
849 tr = (track_t*)minimp4_vector_alloc_tail(&mux->tracks, sizeof(track_t));
850 if (!tr)
852 memset(tr, 0, sizeof(track_t));
853 memcpy(&tr->info, (void*)track_data, sizeof(*track_data));
854 if (!minimp4_vector_init(&tr->smpl, 256))
856 minimp4_vector_init(&tr->vsps, 0);
857 minimp4_vector_init(&tr->vpps, 0);
858 minimp4_vector_init(&tr->pending_sample, 0);
859 return ntr;
860}
861
862static const unsigned char* next_dsi(const unsigned char* p, const unsigned char* end, int* bytes)
863{
864 if (p < end + 2)
865 {
866 *bytes = p[0] * 256 + p[1];
867 return p + 2;
868 }
869 else
870 return NULL;
871}
872
873static int append_mem(minimp4_vector_t* v, const void* mem, int bytes)
874{
875 int i;
876 unsigned char size[2];
877 const unsigned char* p = v->data;
878 for (i = 0; i + 2 < v->bytes;)
879 {
880 int cb = p[i] * 256 + p[i + 1];
881 if (cb == bytes && !memcmp(p + i + 2, mem, cb))
882 return 1;
883 i += 2 + cb;
884 }
885 size[0] = bytes >> 8;
886 size[1] = bytes;
887 return minimp4_vector_put(v, size, 2) && minimp4_vector_put(v, mem, bytes);
888}
889
890static int items_count(minimp4_vector_t* v)
891{
892 int i, count = 0;
893 const unsigned char* p = v->data;
894 for (i = 0; i + 2 < v->bytes;)
895 {
896 int cb = p[i] * 256 + p[i + 1];
897 count++;
898 i += 2 + cb;
899 }
900 return count;
901}
902
903int MP4E_set_dsi(MP4E_mux_t* mux, int track_id, const void* dsi, int bytes)
904{
905 track_t* tr = ((track_t*)mux->tracks.data) + track_id;
906 assert(tr->info.track_media_kind == e_audio ||
907 tr->info.track_media_kind == e_private);
908 if (tr->vsps.bytes)
909 return MP4E_STATUS_ONLY_ONE_DSI_ALLOWED; // only one DSI allowed
910 return append_mem(&tr->vsps, dsi, bytes) ? MP4E_STATUS_OK : MP4E_STATUS_NO_MEMORY;
911}
912
913int MP4E_set_vps(MP4E_mux_t* mux, int track_id, const void* vps, int bytes)
914{
915 track_t* tr = ((track_t*)mux->tracks.data) + track_id;
916 assert(tr->info.track_media_kind == e_video);
917 return append_mem(&tr->vvps, vps, bytes) ? MP4E_STATUS_OK : MP4E_STATUS_NO_MEMORY;
918}
919
920int MP4E_set_sps(MP4E_mux_t* mux, int track_id, const void* sps, int bytes)
921{
922 track_t* tr = ((track_t*)mux->tracks.data) + track_id;
923 assert(tr->info.track_media_kind == e_video);
924 return append_mem(&tr->vsps, sps, bytes) ? MP4E_STATUS_OK : MP4E_STATUS_NO_MEMORY;
925}
926
927int MP4E_set_pps(MP4E_mux_t* mux, int track_id, const void* pps, int bytes)
928{
929 track_t* tr = ((track_t*)mux->tracks.data) + track_id;
930 assert(tr->info.track_media_kind == e_video);
931 return append_mem(&tr->vpps, pps, bytes) ? MP4E_STATUS_OK : MP4E_STATUS_NO_MEMORY;
932}
933
934static unsigned get_duration(const track_t* tr)
935{
936 unsigned i, sum_duration = 0;
937 const sample_t* s = (const sample_t*)tr->smpl.data;
938 for (i = 0; i < tr->smpl.bytes / sizeof(sample_t); i++)
939 {
940 sum_duration += s[i].duration;
941 }
942 return sum_duration;
943}
944
945static int write_pending_data(MP4E_mux_t* mux, track_t* tr)
946{
947 // if have pending sample && have at least one sample in the index
948 if (tr->pending_sample.bytes > 0 && tr->smpl.bytes >= sizeof(sample_t))
949 {
950 // Complete pending sample
951 sample_t* smpl_desc;
952 unsigned char base[8], * p = base;
953
954 assert(mux->sequential_mode_flag);
955
956 // Write each sample to a separate atom
957 assert(mux->sequential_mode_flag); // Separate atom needed for sequential_mode only
958 WRITE_4(tr->pending_sample.bytes + 8);
959 WRITE_4(BOX_mdat);
960 ERR(mux->write_callback(mux->write_pos, base, p - base, mux->token));
961 mux->write_pos += p - base;
962
963 // Update sample descriptor with size and offset
964 smpl_desc = ((sample_t*)minimp4_vector_alloc_tail(&tr->smpl, 0)) - 1;
965 smpl_desc->size = tr->pending_sample.bytes;
966 smpl_desc->offset = (boxsize_t)mux->write_pos;
967
968 // Write data
969 ERR(mux->write_callback(mux->write_pos, tr->pending_sample.data, tr->pending_sample.bytes, mux->token));
970 mux->write_pos += tr->pending_sample.bytes;
971
972 // reset buffer
973 tr->pending_sample.bytes = 0;
974 }
975 return MP4E_STATUS_OK;
976}
977
978static int add_sample_descriptor(MP4E_mux_t* mux, track_t* tr, int data_bytes, int duration, int kind)
979{
980 sample_t smp;
981 smp.size = data_bytes;
982 smp.offset = (boxsize_t)mux->write_pos;
983 smp.duration = (duration ? duration : tr->info.default_duration);
984 smp.flag_random_access = (kind == MP4E_SAMPLE_RANDOM_ACCESS);
985 return NULL != minimp4_vector_put(&tr->smpl, &smp, sizeof(sample_t));
986}
987
988static int mp4e_flush_index(MP4E_mux_t* mux);
989
993static int mp4e_write_fragment_header(MP4E_mux_t* mux, int track_num, int data_bytes, int duration, int kind
995 , uint64_t timestamp
996#endif
997)
998{
999 unsigned char base[888], * p = base;
1000 unsigned char* stack_base[20]; // atoms nesting stack
1001 unsigned char** stack = stack_base;
1002 unsigned char* pdata_offset;
1003 unsigned flags;
1004 enum
1005 {
1006 default_sample_duration_present = 0x000008,
1007 default_sample_flags_present = 0x000020,
1008 } e;
1009
1010 track_t* tr = ((track_t*)mux->tracks.data) + track_num;
1011
1012 ATOM(BOX_moof)
1013 ATOM_FULL(BOX_mfhd, 0)
1014 WRITE_4(mux->fragments_count); // start from 1
1015 END_ATOM
1016 ATOM(BOX_traf)
1017 flags = 0;
1018 if (tr->info.track_media_kind == e_video)
1019 flags |= 0x20; // default-sample-flags-present
1020 else
1021 flags |= 0x08; // default-sample-duration-present
1022 flags = (tr->info.track_media_kind == e_video) ? 0x20020 : 0x20008;
1023
1024 ATOM_FULL(BOX_tfhd, flags)
1025 WRITE_4(track_num + 1); // track_ID
1026 if (tr->info.track_media_kind == e_video)
1027 {
1028 WRITE_4(0x1010000); // default_sample_flags
1029 }
1030 else
1031 {
1032 WRITE_4(duration);
1033 }
1034 END_ATOM
1035#if MP4D_TFDT_SUPPORT
1036 ATOM_FULL(BOX_tfdt, 0x01000000) // version 1
1037 WRITE_4(timestamp >> 32); // upper timestamp
1038 WRITE_4(timestamp & 0xffffffff); // lower timestamp
1039 END_ATOM
1040#endif
1041 if (tr->info.track_media_kind == e_audio)
1042 {
1043 flags = 0;
1044 flags |= 0x001; // data-offset-present
1045 flags |= 0x200; // sample-size-present
1046 ATOM_FULL(BOX_trun, flags)
1047 WRITE_4(1); // sample_count
1048 pdata_offset = p; p += 4; // save ptr to data_offset
1049 WRITE_4(data_bytes);// sample_size
1050 END_ATOM
1051 }
1052 else if (kind == MP4E_SAMPLE_RANDOM_ACCESS)
1053 {
1054 flags = 0;
1055 flags |= 0x001; // data-offset-present
1056 flags |= 0x004; // first-sample-flags-present
1057 flags |= 0x100; // sample-duration-present
1058 flags |= 0x200; // sample-size-present
1059 ATOM_FULL(BOX_trun, flags)
1060 WRITE_4(1); // sample_count
1061 pdata_offset = p; p += 4; // save ptr to data_offset
1062 WRITE_4(0x2000000); // first_sample_flags
1063 WRITE_4(duration); // sample_duration
1064 WRITE_4(data_bytes);// sample_size
1065 END_ATOM
1066 }
1067 else
1068 {
1069 flags = 0;
1070 flags |= 0x001; // data-offset-present
1071 flags |= 0x100; // sample-duration-present
1072 flags |= 0x200; // sample-size-present
1073 ATOM_FULL(BOX_trun, flags)
1074 WRITE_4(1); // sample_count
1075 pdata_offset = p; p += 4; // save ptr to data_offset
1076 WRITE_4(duration); // sample_duration
1077 WRITE_4(data_bytes);// sample_size
1078 END_ATOM
1079 }
1080 END_ATOM
1081 END_ATOM
1082 WR4(pdata_offset, (p - base) + 8);
1083
1084 ERR(mux->write_callback(mux->write_pos, base, p - base, mux->token));
1085 mux->write_pos += p - base;
1086 return MP4E_STATUS_OK;
1087}
1088
1089static int mp4e_write_mdat_box(MP4E_mux_t* mux, uint32_t size)
1090{
1091 unsigned char base[8], * p = base;
1092 WRITE_4(size);
1093 WRITE_4(BOX_mdat);
1094 ERR(mux->write_callback(mux->write_pos, base, p - base, mux->token));
1095 mux->write_pos += p - base;
1096 return MP4E_STATUS_OK;
1097}
1098
1102int MP4E_put_sample(MP4E_mux_t* mux, int track_num, const void* data, int data_bytes, int duration, int kind)
1103{
1104 track_t* tr;
1105 if (!mux || !data)
1107 tr = ((track_t*)mux->tracks.data) + track_num;
1108
1109 if (mux->enable_fragmentation)
1110 {
1111#if MP4D_TFDT_SUPPORT
1112 // NOTE: assume a constant `duration` to calculate current timestamp
1113 uint64_t timestamp = (uint64_t)mux->fragments_count * duration;
1114#endif
1115 if (!mux->fragments_count++)
1116 ERR(mp4e_flush_index(mux)); // write file headers before 1st sample
1117 // write MOOF + MDAT + sample data
1118#if MP4D_TFDT_SUPPORT
1119 ERR(mp4e_write_fragment_header(mux, track_num, data_bytes, duration, kind, timestamp));
1120#else
1121 ERR(mp4e_write_fragment_header(mux, track_num, data_bytes, duration, kind));
1122#endif
1123 // write MDAT box for each sample
1124 ERR(mp4e_write_mdat_box(mux, data_bytes + 8));
1125 ERR(mux->write_callback(mux->write_pos, data, data_bytes, mux->token));
1126 mux->write_pos += data_bytes;
1127 return MP4E_STATUS_OK;
1128 }
1129
1130 if (kind != MP4E_SAMPLE_CONTINUATION)
1131 {
1132 if (mux->sequential_mode_flag)
1133 ERR(write_pending_data(mux, tr));
1134 if (!add_sample_descriptor(mux, tr, data_bytes, duration, kind))
1135 return MP4E_STATUS_NO_MEMORY;
1136 }
1137 else
1138 {
1139 if (!mux->sequential_mode_flag)
1140 {
1141 sample_t* smpl_desc;
1142 if (tr->smpl.bytes < sizeof(sample_t))
1143 return MP4E_STATUS_NO_MEMORY; // write continuation, but there are no samples in the index
1144 // Accumulate size of the continuation in the sample descriptor
1145 smpl_desc = (sample_t*)(tr->smpl.data + tr->smpl.bytes) - 1;
1146 smpl_desc->size += data_bytes;
1147 }
1148 }
1149
1150 if (mux->sequential_mode_flag)
1151 {
1152 if (!minimp4_vector_put(&tr->pending_sample, data, data_bytes))
1153 return MP4E_STATUS_NO_MEMORY;
1154 }
1155 else
1156 {
1157 ERR(mux->write_callback(mux->write_pos, data, data_bytes, mux->token));
1158 mux->write_pos += data_bytes;
1159 }
1160 return MP4E_STATUS_OK;
1161}
1162
1166static int od_size_of_size(int size)
1167{
1168 int i, size_of_size = 1;
1169 for (i = size; i > 0x7F; i -= 0x7F)
1170 size_of_size++;
1171 return size_of_size;
1172}
1173
1180int MP4E_set_text_comment(MP4E_mux_t* mux, const char* comment)
1181{
1182 if (!mux || !comment)
1184 if (mux->text_comment)
1185 free(mux->text_comment);
1186 mux->text_comment = strdup(comment);
1187 if (!mux->text_comment)
1188 return MP4E_STATUS_NO_MEMORY;
1189 return MP4E_STATUS_OK;
1190}
1191
1195static int mp4e_flush_index(MP4E_mux_t* mux)
1196{
1197 unsigned char* stack_base[20]; // atoms nesting stack
1198 unsigned char** stack = stack_base;
1199 unsigned char* base, * p;
1200 unsigned int ntr, index_bytes, ntracks = mux->tracks.bytes / sizeof(track_t);
1201 int i, err;
1202
1203 // How much memory needed for indexes
1204 // Experimental data:
1205 // file with 1 track = 560 bytes
1206 // file with 2 tracks = 972 bytes
1207 // track size = 412 bytes;
1208 // file header size = 148 bytes
1209#define FILE_HEADER_BYTES 256
1210#define TRACK_HEADER_BYTES 512
1211 index_bytes = FILE_HEADER_BYTES;
1212 if (mux->text_comment)
1213 index_bytes += 128 + strlen(mux->text_comment);
1214 for (ntr = 0; ntr < ntracks; ntr++)
1215 {
1216 track_t* tr = ((track_t*)mux->tracks.data) + ntr;
1217 index_bytes += TRACK_HEADER_BYTES; // fixed amount (implementation-dependent)
1218 // may need extra 4 bytes for duration field + 4 bytes for worst-case random access box
1219 index_bytes += tr->smpl.bytes * (sizeof(sample_t) + 4 + 4) / sizeof(sample_t);
1220 index_bytes += tr->vsps.bytes;
1221 index_bytes += tr->vpps.bytes;
1222
1223 ERR(write_pending_data(mux, tr));
1224 }
1225
1226 base = (unsigned char*)malloc(index_bytes);
1227 if (!base)
1228 return MP4E_STATUS_NO_MEMORY;
1229 p = base;
1230
1231 if (!mux->sequential_mode_flag)
1232 {
1233 // update size of mdat box.
1234 // One of 2 points, which requires random file access.
1235 // Second is optional duration update at beginning of file in fragmentation mode.
1236 // This can be avoided using "till eof" size code, but in this case indexes must be
1237 // written before the mdat....
1238 int64_t size = mux->write_pos - sizeof(box_ftyp);
1239 const int64_t size_limit = (int64_t)(uint64_t)0xfffffffe;
1240 if (size > size_limit)
1241 {
1242 WRITE_4(1);
1243 WRITE_4(BOX_mdat);
1244 WRITE_4((size >> 32) & 0xffffffff);
1245 WRITE_4(size & 0xffffffff);
1246 }
1247 else
1248 {
1249 WRITE_4(8);
1250 WRITE_4(BOX_free);
1251 WRITE_4(size - 8);
1252 WRITE_4(BOX_mdat);
1253 }
1254 ERR(mux->write_callback(sizeof(box_ftyp), base, p - base, mux->token));
1255 p = base;
1256 }
1257
1258 // Write index atoms; order taken from Table 1 of [1]
1259#define MOOV_TIMESCALE 1000
1260 ATOM(BOX_moov);
1261 ATOM_FULL(BOX_mvhd, 0);
1262 WRITE_4(0); // creation_time
1263 WRITE_4(0); // modification_time
1264
1265 if (ntracks)
1266 {
1267 track_t* tr = ((track_t*)mux->tracks.data) + 0; // take 1st track
1268 unsigned duration = get_duration(tr);
1269 duration = (unsigned)(duration * 1LL * MOOV_TIMESCALE / tr->info.time_scale);
1270 WRITE_4(MOOV_TIMESCALE); // duration
1271 WRITE_4(duration); // duration
1272 }
1273
1274 WRITE_4(0x00010000); // rate
1275 WRITE_2(0x0100); // volume
1276 WRITE_2(0); // reserved
1277 WRITE_4(0); // reserved
1278 WRITE_4(0); // reserved
1279
1280 // matrix[9]
1281 WRITE_4(0x00010000); WRITE_4(0); WRITE_4(0);
1282 WRITE_4(0); WRITE_4(0x00010000); WRITE_4(0);
1283 WRITE_4(0); WRITE_4(0); WRITE_4(0x40000000);
1284
1285 // pre_defined[6]
1286 WRITE_4(0); WRITE_4(0); WRITE_4(0);
1287 WRITE_4(0); WRITE_4(0); WRITE_4(0);
1288
1289 //next_track_ID is a non-zero integer that indicates a value to use for the track ID of the next track to be
1290 //added to this presentation. Zero is not a valid track ID value. The value of next_track_ID shall be
1291 //larger than the largest track-ID in use.
1292 WRITE_4(ntracks + 1);
1293 END_ATOM;
1294
1295 for (ntr = 0; ntr < ntracks; ntr++)
1296 {
1297 track_t* tr = ((track_t*)mux->tracks.data) + ntr;
1298 unsigned duration = get_duration(tr);
1299 int samples_count = tr->smpl.bytes / sizeof(sample_t);
1300 const sample_t* sample = (const sample_t*)tr->smpl.data;
1301 unsigned handler_type;
1302 const char* handler_ascii = NULL;
1303
1304 if (mux->enable_fragmentation)
1305 samples_count = 0;
1306 else if (samples_count <= 0)
1307 continue; // skip empty track
1308
1309 switch (tr->info.track_media_kind)
1310 {
1311 case e_audio:
1312 handler_type = MP4E_HANDLER_TYPE_SOUN;
1313 handler_ascii = "SoundHandler";
1314 break;
1315 case e_video:
1316 handler_type = MP4E_HANDLER_TYPE_VIDE;
1317 handler_ascii = "VideoHandler";
1318 break;
1319 case e_private:
1320 handler_type = MP4E_HANDLER_TYPE_GESM;
1321 break;
1322 default:
1324 }
1325
1326 ATOM(BOX_trak);
1327 ATOM_FULL(BOX_tkhd, 7); // flag: 1=trak enabled; 2=track in movie; 4=track in preview
1328 WRITE_4(0); // creation_time
1329 WRITE_4(0); // modification_time
1330 WRITE_4(ntr + 1); // track_ID
1331 WRITE_4(0); // reserved
1332 WRITE_4((unsigned)(duration * 1LL * MOOV_TIMESCALE / tr->info.time_scale));
1333 WRITE_4(0); WRITE_4(0); // reserved[2]
1334 WRITE_2(0); // layer
1335 WRITE_2(0); // alternate_group
1336 WRITE_2(0x0100); // volume {if track_is_audio 0x0100 else 0};
1337 WRITE_2(0); // reserved
1338
1339 // matrix[9]
1340 WRITE_4(0x00010000); WRITE_4(0); WRITE_4(0);
1341 WRITE_4(0); WRITE_4(0x00010000); WRITE_4(0);
1342 WRITE_4(0); WRITE_4(0); WRITE_4(0x40000000);
1343
1344 if (tr->info.track_media_kind == e_audio || tr->info.track_media_kind == e_private)
1345 {
1346 WRITE_4(0); // width
1347 WRITE_4(0); // height
1348 }
1349 else
1350 {
1351 WRITE_4(tr->info.u.v.width * 0x10000); // width
1352 WRITE_4(tr->info.u.v.height * 0x10000); // height
1353 }
1354 END_ATOM;
1355
1356 ATOM(BOX_mdia);
1357 ATOM_FULL(BOX_mdhd, 0);
1358 WRITE_4(0); // creation_time
1359 WRITE_4(0); // modification_time
1360 WRITE_4(tr->info.time_scale);
1361 WRITE_4(duration); // duration
1362 {
1363 int lang_code = ((tr->info.language[0] & 31) << 10) | ((tr->info.language[1] & 31) << 5) | (tr->info.language[2] & 31);
1364 WRITE_2(lang_code); // language
1365 }
1366 WRITE_2(0); // pre_defined
1367 END_ATOM;
1368
1369 ATOM_FULL(BOX_hdlr, 0);
1370 WRITE_4(0); // pre_defined
1371 WRITE_4(handler_type); // handler_type
1372 WRITE_4(0); WRITE_4(0); WRITE_4(0); // reserved[3]
1373 // name is a null-terminated string in UTF-8 characters which gives a human-readable name for the track type (for debugging and inspection purposes).
1374 // set mdia hdlr name field to what quicktime uses.
1375 // Sony smartphone may fail to decode short files w/o handler name
1376 if (handler_ascii)
1377 {
1378 for (i = 0; i < (int)strlen(handler_ascii) + 1; i++)
1379 {
1380 WRITE_1(handler_ascii[i]);
1381 }
1382 }
1383 else
1384 {
1385 WRITE_4(0);
1386 }
1387 END_ATOM;
1388
1389 ATOM(BOX_minf);
1390
1391 if (tr->info.track_media_kind == e_audio)
1392 {
1393 // Sound Media Header Box
1394 ATOM_FULL(BOX_smhd, 0);
1395 WRITE_2(0); // balance
1396 WRITE_2(0); // reserved
1397 END_ATOM;
1398 }
1399 if (tr->info.track_media_kind == e_video)
1400 {
1401 // mandatory Video Media Header Box
1402 ATOM_FULL(BOX_vmhd, 1);
1403 WRITE_2(0); // graphicsmode
1404 WRITE_2(0); WRITE_2(0); WRITE_2(0); // opcolor[3]
1405 END_ATOM;
1406 }
1407
1408 ATOM(BOX_dinf);
1409 ATOM_FULL(BOX_dref, 0);
1410 WRITE_4(1); // entry_count
1411 // If the flag is set indicating that the data is in the same file as this box, then no string (not even an empty one)
1412 // shall be supplied in the entry field.
1413
1414 // ASP the correct way to avoid supply the string, is to use flag 1
1415 // otherwise ISO reference demux crashes
1416 ATOM_FULL(BOX_url, 1);
1417 END_ATOM;
1418 END_ATOM;
1419 END_ATOM;
1420
1421 ATOM(BOX_stbl);
1422 ATOM_FULL(BOX_stsd, 0);
1423 WRITE_4(1); // entry_count;
1424
1425 if (tr->info.track_media_kind == e_audio || tr->info.track_media_kind == e_private)
1426 {
1427 // AudioSampleEntry() assume MP4E_HANDLER_TYPE_SOUN
1428 if (tr->info.track_media_kind == e_audio)
1429 {
1430 ATOM(BOX_mp4a);
1431 }
1432 else
1433 {
1434 ATOM(BOX_mp4s);
1435 }
1436
1437 // SampleEntry
1438 WRITE_4(0); WRITE_2(0); // reserved[6]
1439 WRITE_2(1); // data_reference_index; - this is a tag for descriptor below
1440
1441 if (tr->info.track_media_kind == e_audio)
1442 {
1443 // AudioSampleEntry
1444 WRITE_4(0); WRITE_4(0); // reserved[2]
1445 WRITE_2(tr->info.u.a.channelcount); // channelcount
1446 WRITE_2(16); // samplesize
1447 WRITE_4(0); // pre_defined+reserved
1448 WRITE_4((tr->info.time_scale << 16)); // samplerate == = {timescale of media}<<16;
1449 }
1450
1451 ATOM_FULL(BOX_esds, 0);
1452 if (tr->vsps.bytes > 0)
1453 {
1454 int dsi_bytes = tr->vsps.bytes - 2; // - two bytes size field
1455 int dsi_size_size = od_size_of_size(dsi_bytes);
1456 int dcd_bytes = dsi_bytes + dsi_size_size + 1 + (1 + 1 + 3 + 4 + 4);
1457 int dcd_size_size = od_size_of_size(dcd_bytes);
1458 int esd_bytes = dcd_bytes + dcd_size_size + 1 + 3;
1459
1460#define WRITE_OD_LEN(size) if (size > 0x7F) do { size -= 0x7F; WRITE_1(0x00ff); } while (size > 0x7F); WRITE_1(size)
1461 WRITE_1(3); // OD_ESD
1462 WRITE_OD_LEN(esd_bytes);
1463 WRITE_2(0); // ES_ID(2) // TODO - what is this?
1464 WRITE_1(0); // flags(1)
1465
1466 WRITE_1(4); // OD_DCD
1467 WRITE_OD_LEN(dcd_bytes);
1468 if (tr->info.track_media_kind == e_audio)
1469 {
1470 WRITE_1(MP4_OBJECT_TYPE_AUDIO_ISO_IEC_14496_3); // OD_DCD
1471 WRITE_1(5 << 2); // stream_type == AudioStream
1472 }
1473 else
1474 {
1475 // http://xhelmboyx.tripod.com/formats/mp4-layout.txt
1476 WRITE_1(208); // 208 = private video
1477 WRITE_1(32 << 2); // stream_type == user private
1478 }
1479 WRITE_3(tr->info.u.a.channelcount * 6144 / 8); // bufferSizeDB in bytes, constant as in reference decoder
1480 WRITE_4(0); // maxBitrate TODO
1481 WRITE_4(0); // avg_bitrate_bps TODO
1482
1483 WRITE_1(5); // OD_DSI
1484 WRITE_OD_LEN(dsi_bytes);
1485 for (i = 0; i < dsi_bytes; i++)
1486 {
1487 WRITE_1(tr->vsps.data[2 + i]);
1488 }
1489 }
1490 END_ATOM;
1491 END_ATOM;
1492 }
1493
1494 if (tr->info.track_media_kind == e_video && (MP4_OBJECT_TYPE_AVC == tr->info.object_type_indication || MP4_OBJECT_TYPE_HEVC == tr->info.object_type_indication))
1495 {
1496 int numOfSequenceParameterSets = items_count(&tr->vsps);
1497 int numOfPictureParameterSets = items_count(&tr->vpps);
1498 if (MP4_OBJECT_TYPE_AVC == tr->info.object_type_indication)
1499 {
1500 ATOM(BOX_avc1);
1501 }
1502 else
1503 {
1504 ATOM(BOX_hvc1);
1505 }
1506 // VisualSampleEntry 8.16.2
1507 // extends SampleEntry
1508 WRITE_2(0); // reserved
1509 WRITE_2(0); // reserved
1510 WRITE_2(0); // reserved
1511 WRITE_2(1); // data_reference_index
1512
1513 WRITE_2(0); // pre_defined
1514 WRITE_2(0); // reserved
1515 WRITE_4(0); // pre_defined
1516 WRITE_4(0); // pre_defined
1517 WRITE_4(0); // pre_defined
1518 WRITE_2(tr->info.u.v.width);
1519 WRITE_2(tr->info.u.v.height);
1520 WRITE_4(0x00480000); // horizresolution = 72 dpi
1521 WRITE_4(0x00480000); // vertresolution = 72 dpi
1522 WRITE_4(0); // reserved
1523 WRITE_2(1); // frame_count
1524 for (i = 0; i < 32; i++)
1525 {
1526 WRITE_1(0); // compressorname
1527 }
1528 WRITE_2(24); // depth
1529 WRITE_2(-1); // pre_defined
1530
1531 if (MP4_OBJECT_TYPE_AVC == tr->info.object_type_indication)
1532 {
1533 ATOM(BOX_avcC);
1534 // AVCDecoderConfigurationRecord 5.2.4.1.1
1535 WRITE_1(1); // configurationVersion
1536 WRITE_1(tr->vsps.data[2 + 1]);
1537 WRITE_1(tr->vsps.data[2 + 2]);
1538 WRITE_1(tr->vsps.data[2 + 3]);
1539 WRITE_1(255); // 0xfc + NALU_len - 1
1540 WRITE_1(0xe0 | numOfSequenceParameterSets);
1541 for (i = 0; i < tr->vsps.bytes; i++)
1542 {
1543 WRITE_1(tr->vsps.data[i]);
1544 }
1545 WRITE_1(numOfPictureParameterSets);
1546 for (i = 0; i < tr->vpps.bytes; i++)
1547 {
1548 WRITE_1(tr->vpps.data[i]);
1549 }
1550 }
1551 else
1552 {
1553 int numOfVPS = items_count(&tr->vpps);
1554 ATOM(BOX_hvcC);
1555 // TODO: read actual params from stream
1556 WRITE_1(1); // configurationVersion
1557 WRITE_1(1); // Profile Space (2), Tier (1), Profile (5)
1558 WRITE_4(0x60000000); // Profile Compatibility
1559 WRITE_2(0); // progressive, interlaced, non packed constraint, frame only constraint flags
1560 WRITE_4(0); // constraint indicator flags
1561 WRITE_1(0); // level_idc
1562 WRITE_2(0xf000); // Min Spatial Segmentation
1563 WRITE_1(0xfc); // Parallelism Type
1564 WRITE_1(0xfc); // Chroma Format
1565 WRITE_1(0xf8); // Luma Depth
1566 WRITE_1(0xf8); // Chroma Depth
1567 WRITE_2(0); // Avg Frame Rate
1568 WRITE_1(3); // ConstantFrameRate (2), NumTemporalLayers (3), TemporalIdNested (1), LengthSizeMinusOne (2)
1569
1570 WRITE_1(3); // Num Of Arrays
1571 WRITE_1((1 << 7) | (HEVC_NAL_VPS & 0x3f)); // Array Completeness + NAL Unit Type
1572 WRITE_2(numOfVPS);
1573 for (i = 0; i < tr->vvps.bytes; i++)
1574 {
1575 WRITE_1(tr->vvps.data[i]);
1576 }
1577 WRITE_1((1 << 7) | (HEVC_NAL_SPS & 0x3f));
1578 WRITE_2(numOfSequenceParameterSets);
1579 for (i = 0; i < tr->vsps.bytes; i++)
1580 {
1581 WRITE_1(tr->vsps.data[i]);
1582 }
1583 WRITE_1((1 << 7) | (HEVC_NAL_PPS & 0x3f));
1584 WRITE_2(numOfPictureParameterSets);
1585 for (i = 0; i < tr->vpps.bytes; i++)
1586 {
1587 WRITE_1(tr->vpps.data[i]);
1588 }
1589 }
1590
1591 END_ATOM;
1592 END_ATOM;
1593 }
1594 END_ATOM;
1595
1596 /************************************************************************/
1597 /* indexes */
1598 /************************************************************************/
1599
1600 // Time to Sample Box
1601 ATOM_FULL(BOX_stts, 0);
1602 {
1603 unsigned char* pentry_count = p;
1604 int cnt = 1, entry_count = 0;
1605 WRITE_4(0);
1606 for (i = 0; i < samples_count; i++, cnt++)
1607 {
1608 if (i == (samples_count - 1) || sample[i].duration != sample[i + 1].duration)
1609 {
1610 WRITE_4(cnt);
1611 WRITE_4(sample[i].duration);
1612 cnt = 0;
1613 entry_count++;
1614 }
1615 }
1616 WR4(pentry_count, entry_count);
1617 }
1618 END_ATOM;
1619
1620 // Sample To Chunk Box
1621 ATOM_FULL(BOX_stsc, 0);
1622 if (mux->enable_fragmentation)
1623 {
1624 WRITE_4(0); // entry_count
1625 }
1626 else
1627 {
1628 WRITE_4(1); // entry_count
1629 WRITE_4(1); // first_chunk;
1630 WRITE_4(1); // samples_per_chunk;
1631 WRITE_4(1); // sample_description_index;
1632 }
1633 END_ATOM;
1634
1635 // Sample Size Box
1636 ATOM_FULL(BOX_stsz, 0);
1637 WRITE_4(0); // sample_size If this field is set to 0, then the samples have different sizes, and those sizes
1638 // are stored in the sample size table.
1639 WRITE_4(samples_count); // sample_count;
1640 for (i = 0; i < samples_count; i++)
1641 {
1642 WRITE_4(sample[i].size);
1643 }
1644 END_ATOM;
1645
1646 // Chunk Offset Box
1647 int is_64_bit = 0;
1648 if (samples_count && sample[samples_count - 1].offset > 0xffffffff)
1649 is_64_bit = 1;
1650 if (!is_64_bit)
1651 {
1652 ATOM_FULL(BOX_stco, 0);
1653 WRITE_4(samples_count);
1654 for (i = 0; i < samples_count; i++)
1655 {
1656 WRITE_4(sample[i].offset);
1657 }
1658 }
1659 else
1660 {
1661 ATOM_FULL(BOX_co64, 0);
1662 WRITE_4(samples_count);
1663 for (i = 0; i < samples_count; i++)
1664 {
1665 WRITE_4((sample[i].offset >> 32) & 0xffffffff);
1666 WRITE_4(sample[i].offset & 0xffffffff);
1667 }
1668 }
1669 END_ATOM;
1670
1671 // Sync Sample Box
1672 {
1673 int ra_count = 0;
1674 for (i = 0; i < samples_count; i++)
1675 {
1676 ra_count += !!sample[i].flag_random_access;
1677 }
1678 if (ra_count != samples_count)
1679 {
1680 // If the sync sample box is not present, every sample is a random access point.
1681 ATOM_FULL(BOX_stss, 0);
1682 WRITE_4(ra_count);
1683 for (i = 0; i < samples_count; i++)
1684 {
1685 if (sample[i].flag_random_access)
1686 {
1687 WRITE_4(i + 1);
1688 }
1689 }
1690 END_ATOM;
1691 }
1692 }
1693 END_ATOM;
1694 END_ATOM;
1695 END_ATOM;
1696 END_ATOM;
1697 } // tracks loop
1698
1699 if (mux->text_comment)
1700 {
1701 ATOM(BOX_udta);
1702 ATOM_FULL(BOX_meta, 0);
1703 ATOM_FULL(BOX_hdlr, 0);
1704 WRITE_4(0); // pre_defined
1705#define MP4E_HANDLER_TYPE_MDIR 0x6d646972
1706 WRITE_4(MP4E_HANDLER_TYPE_MDIR); // handler_type
1707 WRITE_4(0); WRITE_4(0); WRITE_4(0); // reserved[3]
1708 WRITE_4(0); // name is a null-terminated string in UTF-8 characters which gives a human-readable name for the track type (for debugging and inspection purposes).
1709 END_ATOM;
1710 ATOM(BOX_ilst);
1711 ATOM(BOX_ccmt);
1712 ATOM(BOX_data);
1713 WRITE_4(1); // type
1714 WRITE_4(0); // lang
1715 for (i = 0; i < (int)strlen(mux->text_comment) + 1; i++)
1716 {
1717 WRITE_1(mux->text_comment[i]);
1718 }
1719 END_ATOM;
1720 END_ATOM;
1721 END_ATOM;
1722 END_ATOM;
1723 END_ATOM;
1724 }
1725
1726 if (mux->enable_fragmentation)
1727 {
1728 track_t* tr = ((track_t*)mux->tracks.data) + 0;
1729 uint32_t movie_duration = get_duration(tr);
1730
1731 ATOM(BOX_mvex);
1732 ATOM_FULL(BOX_mehd, 0);
1733 WRITE_4(movie_duration); // duration
1734 END_ATOM;
1735 for (ntr = 0; ntr < ntracks; ntr++)
1736 {
1737 ATOM_FULL(BOX_trex, 0);
1738 WRITE_4(ntr + 1); // track_ID
1739 WRITE_4(1); // default_sample_description_index
1740 WRITE_4(0); // default_sample_duration
1741 WRITE_4(0); // default_sample_size
1742 WRITE_4(0); // default_sample_flags
1743 END_ATOM;
1744 }
1745 END_ATOM;
1746 }
1747 END_ATOM; // moov atom
1748
1749 assert((unsigned)(p - base) <= index_bytes);
1750
1751 err = mux->write_callback(mux->write_pos, base, p - base, mux->token);
1752 mux->write_pos += p - base;
1753 free(base);
1754 return err;
1755}
1756
1757int MP4E_close(MP4E_mux_t* mux)
1758{
1759 int err = MP4E_STATUS_OK;
1760 unsigned ntr, ntracks;
1761 if (!mux)
1763 if (!mux->enable_fragmentation)
1764 err = mp4e_flush_index(mux);
1765 if (mux->text_comment)
1766 free(mux->text_comment);
1767 ntracks = mux->tracks.bytes / sizeof(track_t);
1768 for (ntr = 0; ntr < ntracks; ntr++)
1769 {
1770 track_t* tr = ((track_t*)mux->tracks.data) + ntr;
1771 minimp4_vector_reset(&tr->vsps);
1772 minimp4_vector_reset(&tr->vpps);
1773 minimp4_vector_reset(&tr->smpl);
1774 minimp4_vector_reset(&tr->pending_sample);
1775 }
1776 minimp4_vector_reset(&mux->tracks);
1777 free(mux);
1778 return err;
1779}
1780
1781typedef uint32_t bs_item_t;
1782#define BS_BITS 32
1783
1784typedef struct
1785{
1786 // Look-ahead bit cache: MSB aligned, 17 bits guaranteed, zero stuffing
1787 unsigned int cache;
1788
1789 // Bit counter = 16 - (number of bits in wCache)
1790 // cache refilled when cache_free_bits >= 0
1791 int cache_free_bits;
1792
1793 // Current read position
1794 const uint16_t* buf;
1795
1796 // original data buffer
1797 const uint16_t* origin;
1798
1799 // original data buffer length, bytes
1800 unsigned origin_bytes;
1801} bit_reader_t;
1802
1803
1804#define LOAD_SHORT(x) ((uint16_t)(x << 8) | (x >> 8))
1805
1806static unsigned int show_bits(bit_reader_t* bs, int n)
1807{
1808 unsigned int retval;
1809 assert(n > 0 && n <= 16);
1810 retval = (unsigned int)(bs->cache >> (32 - n));
1811 return retval;
1812}
1813
1814static void flush_bits(bit_reader_t* bs, int n)
1815{
1816 assert(n >= 0 && n <= 16);
1817 bs->cache <<= n;
1818 bs->cache_free_bits += n;
1819 if (bs->cache_free_bits >= 0)
1820 {
1821 bs->cache |= ((uint32_t)LOAD_SHORT(*bs->buf)) << bs->cache_free_bits;
1822 bs->buf++;
1823 bs->cache_free_bits -= 16;
1824 }
1825}
1826
1827static unsigned int get_bits(bit_reader_t* bs, int n)
1828{
1829 unsigned int retval = show_bits(bs, n);
1830 flush_bits(bs, n);
1831 return retval;
1832}
1833
1834static void set_pos_bits(bit_reader_t* bs, unsigned pos_bits)
1835{
1836 assert((int)pos_bits >= 0);
1837
1838 bs->buf = bs->origin + pos_bits / 16;
1839 bs->cache = 0;
1840 bs->cache_free_bits = 16;
1841 flush_bits(bs, 0);
1842 flush_bits(bs, pos_bits & 15);
1843}
1844
1845static unsigned get_pos_bits(const bit_reader_t* bs)
1846{
1847 // Current bitbuffer position =
1848 // position of next wobits in the internal buffer
1849 // minus bs, available in bit cache wobits
1850 unsigned pos_bits = (unsigned)(bs->buf - bs->origin) * 16;
1851 pos_bits -= 16 - bs->cache_free_bits;
1852 assert((int)pos_bits >= 0);
1853 return pos_bits;
1854}
1855
1856static int remaining_bits(const bit_reader_t* bs)
1857{
1858 return bs->origin_bytes * 8 - get_pos_bits(bs);
1859}
1860
1861static void init_bits(bit_reader_t* bs, const void* data, unsigned data_bytes)
1862{
1863 bs->origin = (const uint16_t*)data;
1864 bs->origin_bytes = data_bytes;
1865 set_pos_bits(bs, 0);
1866}
1867
1868#define GetBits(n) get_bits(bs, n)
1869
1873static int ue_bits(bit_reader_t* bs)
1874{
1875 int clz;
1876 int val;
1877 for (clz = 0; !get_bits(bs, 1); clz++) {}
1878 //get_bits(bs, clz + 1);
1879 val = (1 << clz) - 1 + (clz ? get_bits(bs, clz) : 0);
1880 return val;
1881}
1882
1883#if MINIMP4_TRANSCODE_SPS_ID
1884
1888typedef struct
1889{
1890 int shift; // bit position in the cache
1891 uint32_t cache; // bit cache
1892 bs_item_t* buf; // current position
1893 bs_item_t* origin; // initial position
1894} bs_t;
1895
1896#define SWAP32(x) (uint32_t)((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) << 8) & 0xFF0000) | ((x & 0xFF) << 24))
1897
1898static void h264e_bs_put_bits(bs_t* bs, unsigned n, unsigned val)
1899{
1900 assert(!(val >> n));
1901 bs->shift -= n;
1902 assert((unsigned)n <= 32);
1903 if (bs->shift < 0)
1904 {
1905 assert(-bs->shift < 32);
1906 bs->cache |= val >> -bs->shift;
1907 *bs->buf++ = SWAP32(bs->cache);
1908 bs->shift = 32 + bs->shift;
1909 bs->cache = 0;
1910 }
1911 bs->cache |= val << bs->shift;
1912}
1913
1914static void h264e_bs_flush(bs_t* bs)
1915{
1916 *bs->buf = SWAP32(bs->cache);
1917}
1918
1919static unsigned h264e_bs_get_pos_bits(const bs_t* bs)
1920{
1921 unsigned pos_bits = (unsigned)((bs->buf - bs->origin) * BS_BITS);
1922 pos_bits += BS_BITS - bs->shift;
1923 assert((int)pos_bits >= 0);
1924 return pos_bits;
1925}
1926
1927static unsigned h264e_bs_byte_align(bs_t* bs)
1928{
1929 int pos = h264e_bs_get_pos_bits(bs);
1930 h264e_bs_put_bits(bs, -pos & 7, 0);
1931 return pos + (-pos & 7);
1932}
1933
1948static void h264e_bs_put_golomb(bs_t* bs, unsigned val)
1949{
1950 int size = 0;
1951 unsigned t = val + 1;
1952 do
1953 {
1954 size++;
1955 } while (t >>= 1);
1956
1957 h264e_bs_put_bits(bs, 2 * size - 1, val + 1);
1958}
1959
1960static void h264e_bs_init_bits(bs_t* bs, void* data)
1961{
1962 bs->origin = (bs_item_t*)data;
1963 bs->buf = bs->origin;
1964 bs->shift = BS_BITS;
1965 bs->cache = 0;
1966}
1967
1968static int find_mem_cache(void* cache[], int cache_bytes[], int cache_size, void* mem, int bytes)
1969{
1970 int i;
1971 if (!bytes)
1972 return -1;
1973 for (i = 0; i < cache_size; i++)
1974 {
1975 if (cache_bytes[i] == bytes && !memcmp(mem, cache[i], bytes))
1976 return i; // found
1977 }
1978 for (i = 0; i < cache_size; i++)
1979 {
1980 if (!cache_bytes[i])
1981 {
1982 cache[i] = malloc(bytes);
1983 if (cache[i])
1984 {
1985 memcpy(cache[i], mem, bytes);
1986 cache_bytes[i] = bytes;
1987 }
1988 return i; // put in
1989 }
1990 }
1991 return -1; // no room
1992}
1993
1997static int remove_nal_escapes(unsigned char* dst, const unsigned char* src, int h264_data_bytes)
1998{
1999 int i = 0, j = 0, zero_cnt = 0;
2000 for (j = 0; j < h264_data_bytes; j++)
2001 {
2002 if (zero_cnt == 2 && src[j] <= 3)
2003 {
2004 if (src[j] == 3)
2005 {
2006 if (j == h264_data_bytes - 1)
2007 {
2008 // cabac_zero_word: no action
2009 }
2010 else if (src[j + 1] <= 3)
2011 {
2012 j++;
2013 zero_cnt = 0;
2014 }
2015 else
2016 {
2017 // TODO: assume end-of-nal
2018 //return 0;
2019 }
2020 }
2021 else
2022 return 0;
2023 }
2024 dst[i++] = src[j];
2025 if (src[j])
2026 zero_cnt = 0;
2027 else
2028 zero_cnt++;
2029 }
2030 //while (--j > i) src[j] = 0;
2031 return i;
2032}
2033
2037static int nal_put_esc(uint8_t* d, const uint8_t* s, int n)
2038{
2039 int i, j = 4, cntz = 0;
2040 d[0] = d[1] = d[2] = 0; d[3] = 1; // start code
2041 for (i = 0; i < n; i++)
2042 {
2043 uint8_t byte = *s++;
2044 if (cntz == 2 && byte <= 3)
2045 {
2046 d[j++] = 3;
2047 cntz = 0;
2048 }
2049 if (byte)
2050 cntz = 0;
2051 else
2052 cntz++;
2053 d[j++] = byte;
2054 }
2055 return j;
2056}
2057
2058static void copy_bits(bit_reader_t* bs, bs_t* bd)
2059{
2060 unsigned cb, bits;
2061 int bit_count = remaining_bits(bs);
2062 while (bit_count > 7)
2063 {
2064 cb = MINIMP4_MIN(bit_count - 7, 8);
2065 bits = GetBits(cb);
2066 h264e_bs_put_bits(bd, cb, bits);
2067 bit_count -= cb;
2068 }
2069
2070 // cut extra zeros after stop-bit
2071 bits = GetBits(bit_count);
2072 for (; bit_count && ~bits & 1; bit_count--)
2073 {
2074 bits >>= 1;
2075 }
2076 if (bit_count)
2077 {
2078 h264e_bs_put_bits(bd, bit_count, bits);
2079 }
2080}
2081
2082static int change_sps_id(bit_reader_t* bs, bs_t* bd, int new_id, int* old_id)
2083{
2084 unsigned bits, sps_id, i, bytes;
2085 for (i = 0; i < 3; i++)
2086 {
2087 bits = GetBits(8);
2088 h264e_bs_put_bits(bd, 8, bits);
2089 }
2090 sps_id = ue_bits(bs); // max = 31
2091
2092 *old_id = sps_id;
2093 sps_id = new_id;
2094 assert(sps_id <= 31);
2095
2096 h264e_bs_put_golomb(bd, sps_id);
2097 copy_bits(bs, bd);
2098
2099 bytes = h264e_bs_byte_align(bd) / 8;
2100 h264e_bs_flush(bd);
2101 return bytes;
2102}
2103
2104static int patch_pps(h264_sps_id_patcher_t* h, bit_reader_t* bs, bs_t* bd, int new_pps_id, int* old_id)
2105{
2106 int bytes;
2107 unsigned pps_id = ue_bits(bs); // max = 255
2108 unsigned sps_id = ue_bits(bs); // max = 31
2109
2110 *old_id = pps_id;
2111 sps_id = h->map_sps[sps_id];
2112 pps_id = new_pps_id;
2113
2114 assert(sps_id <= 31);
2115 assert(pps_id <= 255);
2116
2117 h264e_bs_put_golomb(bd, pps_id);
2118 h264e_bs_put_golomb(bd, sps_id);
2119 copy_bits(bs, bd);
2120
2121 bytes = h264e_bs_byte_align(bd) / 8;
2122 h264e_bs_flush(bd);
2123 return bytes;
2124}
2125
2126static void patch_slice_header(h264_sps_id_patcher_t* h, bit_reader_t* bs, bs_t* bd)
2127{
2128 unsigned first_mb_in_slice = ue_bits(bs);
2129 unsigned slice_type = ue_bits(bs);
2130 unsigned pps_id = ue_bits(bs);
2131
2132 pps_id = h->map_pps[pps_id];
2133
2134 assert(pps_id <= 255);
2135
2136 h264e_bs_put_golomb(bd, first_mb_in_slice);
2137 h264e_bs_put_golomb(bd, slice_type);
2138 h264e_bs_put_golomb(bd, pps_id);
2139 copy_bits(bs, bd);
2140}
2141
2142static int transcode_nalu(h264_sps_id_patcher_t* h, const unsigned char* src, int nalu_bytes, unsigned char* dst)
2143{
2144 int old_id;
2145
2146 bit_reader_t bst[1];
2147 bs_t bdt[1];
2148
2149 bit_reader_t bs[1];
2150 bs_t bd[1];
2151 int payload_type = src[0] & 31;
2152
2153 *dst = *src;
2154 h264e_bs_init_bits(bd, dst + 1);
2155 init_bits(bs, src + 1, nalu_bytes - 1);
2156 h264e_bs_init_bits(bdt, dst + 1);
2157 init_bits(bst, src + 1, nalu_bytes - 1);
2158
2159 switch (payload_type)
2160 {
2161 case 7:
2162 {
2163 int cb = change_sps_id(bst, bdt, 0, &old_id);
2164 int id = find_mem_cache(h->sps_cache, h->sps_bytes, MINIMP4_MAX_SPS, dst + 1, cb);
2165 if (id == -1)
2166 return 0;
2167 h->map_sps[old_id] = id;
2168 change_sps_id(bs, bd, id, &old_id);
2169 }
2170 break;
2171 case 8:
2172 {
2173 int cb = patch_pps(h, bst, bdt, 0, &old_id);
2174 int id = find_mem_cache(h->pps_cache, h->pps_bytes, MINIMP4_MAX_PPS, dst + 1, cb);
2175 if (id == -1)
2176 return 0;
2177 h->map_pps[old_id] = id;
2178 patch_pps(h, bs, bd, id, &old_id);
2179 }
2180 break;
2181 case 1:
2182 case 2:
2183 case 5:
2184 patch_slice_header(h, bs, bd);
2185 break;
2186 default:
2187 memcpy(dst, (void*)src, nalu_bytes);
2188 return nalu_bytes;
2189 }
2190
2191 nalu_bytes = 1 + h264e_bs_byte_align(bd) / 8;
2192 h264e_bs_flush(bd);
2193
2194 return nalu_bytes;
2195}
2196
2197#endif
2198
2207static const uint8_t* find_start_code(const uint8_t* h264_data, int h264_data_bytes, int* zcount)
2208{
2209 const uint8_t* eof = h264_data + h264_data_bytes;
2210 const uint8_t* p = h264_data;
2211 do
2212 {
2213 int zero_cnt = 1;
2214 const uint8_t* found = (uint8_t*)memchr(p, 0, eof - p);
2215 p = found ? found : eof;
2216 while (p + zero_cnt < eof && !p[zero_cnt]) zero_cnt++;
2217 if (zero_cnt >= 2 && p[zero_cnt] == 1)
2218 {
2219 *zcount = zero_cnt + 1;
2220 return p + zero_cnt + 1;
2221 }
2222 p += zero_cnt;
2223 } while (p < eof);
2224 *zcount = 0;
2225 return eof;
2226}
2227
2231static const uint8_t* find_nal_unit(const uint8_t* h264_data, int h264_data_bytes, int* pnal_unit_bytes)
2232{
2233 const uint8_t* eof = h264_data + h264_data_bytes;
2234 int zcount;
2235 const uint8_t* start = find_start_code(h264_data, h264_data_bytes, &zcount);
2236 const uint8_t* stop = start;
2237 if (start)
2238 {
2239 stop = find_start_code(start, (int)(eof - start), &zcount);
2240 while (stop > start && !stop[-1])
2241 {
2242 stop--;
2243 }
2244 }
2245
2246 *pnal_unit_bytes = (int)(stop - start - zcount);
2247 return start;
2248}
2249
2250int mp4_h26x_write_init(mp4_h26x_writer_t* h, MP4E_mux_t* mux, int width, int height, int is_hevc)
2251{
2252 MP4E_track_t tr;
2254 tr.language[0] = 'u';
2255 tr.language[1] = 'n';
2256 tr.language[2] = 'd';
2257 tr.language[3] = 0;
2259 tr.time_scale = 90000;
2260 tr.default_duration = 0;
2261 tr.u.v.width = width;
2262 tr.u.v.height = height;
2263 h->mux_track_id = MP4E_add_track(mux, &tr);
2264 h->mux = mux;
2265
2266 h->is_hevc = is_hevc;
2267 h->need_vps = is_hevc;
2268 h->need_sps = 1;
2269 h->need_pps = 1;
2270 h->need_idr = 1;
2271#if MINIMP4_TRANSCODE_SPS_ID
2272 memset(&h->sps_patcher, 0, sizeof(h264_sps_id_patcher_t));
2273#endif
2274 return MP4E_STATUS_OK;
2275}
2276
2278{
2279#if MINIMP4_TRANSCODE_SPS_ID
2281 int i;
2282 for (i = 0; i < MINIMP4_MAX_SPS; i++)
2283 {
2284 if (p->sps_cache[i])
2285 free(p->sps_cache[i]);
2286 }
2287 for (i = 0; i < MINIMP4_MAX_PPS; i++)
2288 {
2289 if (p->pps_cache[i])
2290 free(p->pps_cache[i]);
2291 }
2292#endif
2293 memset(h, 0, sizeof(*h));
2294}
2295
2296static int mp4_h265_write_nal(mp4_h26x_writer_t* h, const unsigned char* nal, int sizeof_nal, unsigned timeStamp90kHz_next)
2297{
2298 int payload_type = (nal[0] >> 1) & 0x3f;
2299 int is_intra = payload_type >= HEVC_NAL_BLA_W_LP && payload_type <= HEVC_NAL_CRA_NUT;
2300 int err = MP4E_STATUS_OK;
2301 //printf("payload_type=%d, intra=%d\n", payload_type, is_intra);
2302
2303 if (is_intra && !h->need_sps && !h->need_pps && !h->need_vps)
2304 h->need_idr = 0;
2305 switch (payload_type)
2306 {
2307 case HEVC_NAL_VPS:
2308 MP4E_set_vps(h->mux, h->mux_track_id, nal, sizeof_nal);
2309 h->need_vps = 0;
2310 break;
2311 case HEVC_NAL_SPS:
2312 MP4E_set_sps(h->mux, h->mux_track_id, nal, sizeof_nal);
2313 h->need_sps = 0;
2314 break;
2315 case HEVC_NAL_PPS:
2316 MP4E_set_pps(h->mux, h->mux_track_id, nal, sizeof_nal);
2317 h->need_pps = 0;
2318 break;
2319 default:
2320 if (h->need_vps || h->need_sps || h->need_pps || h->need_idr)
2322 {
2323 unsigned char* tmp = (unsigned char*)malloc(4 + sizeof_nal);
2324 if (!tmp)
2325 return MP4E_STATUS_NO_MEMORY;
2326 int sample_kind = MP4E_SAMPLE_DEFAULT;
2327 tmp[0] = (unsigned char)(sizeof_nal >> 24);
2328 tmp[1] = (unsigned char)(sizeof_nal >> 16);
2329 tmp[2] = (unsigned char)(sizeof_nal >> 8);
2330 tmp[3] = (unsigned char)(sizeof_nal);
2331 memcpy(tmp + 4,(void*) nal, sizeof_nal);
2332 if (is_intra)
2333 sample_kind = MP4E_SAMPLE_RANDOM_ACCESS;
2334 err = MP4E_put_sample(h->mux, h->mux_track_id, tmp, 4 + sizeof_nal, timeStamp90kHz_next, sample_kind);
2335 free(tmp);
2336 }
2337 break;
2338 }
2339 return err;
2340}
2341
2342int mp4_h26x_write_nal(mp4_h26x_writer_t* h, const unsigned char* nal, int length, unsigned timeStamp90kHz_next)
2343{
2344 const unsigned char* eof = nal + length;
2345 int payload_type, sizeof_nal, err = MP4E_STATUS_OK;
2346 for (;; nal++)
2347 {
2348#if MINIMP4_TRANSCODE_SPS_ID
2349 unsigned char* nal1, * nal2;
2350#endif
2351 nal = find_nal_unit(nal, (int)(eof - nal), &sizeof_nal);
2352 if (!sizeof_nal)
2353 break;
2354 if (h->is_hevc)
2355 {
2356 ERR(mp4_h265_write_nal(h, nal, sizeof_nal, timeStamp90kHz_next));
2357 continue;
2358 }
2359 payload_type = nal[0] & 31;
2360 if (9 == payload_type)
2361 continue; // access unit delimiter, nothing to be done
2362#if MINIMP4_TRANSCODE_SPS_ID
2363 // Transcode SPS, PPS and slice headers, reassigning ID's for SPS and PPS:
2364 // - assign unique ID's to different SPS and PPS
2365 // - assign same ID's to equal (except ID) SPS and PPS
2366 // - save all different SPS and PPS
2367 nal1 = (unsigned char*)malloc(sizeof_nal * 17 / 16 + 32);
2368 if (!nal1)
2369 return MP4E_STATUS_NO_MEMORY;
2370 nal2 = (unsigned char*)malloc(sizeof_nal * 17 / 16 + 32);
2371 if (!nal2)
2372 {
2373 free(nal1);
2374 return MP4E_STATUS_NO_MEMORY;
2375 }
2376 sizeof_nal = remove_nal_escapes(nal2, nal, sizeof_nal);
2377 if (!sizeof_nal)
2378 {
2379 exit_with_free:
2380 free(nal1);
2381 free(nal2);
2383 }
2384
2385 sizeof_nal = transcode_nalu(&h->sps_patcher, nal2, sizeof_nal, nal1);
2386 sizeof_nal = nal_put_esc(nal2, nal1, sizeof_nal);
2387
2388 switch (payload_type) {
2389 case 7:
2390 MP4E_set_sps(h->mux, h->mux_track_id, nal2 + 4, sizeof_nal - 4);
2391 h->need_sps = 0;
2392 break;
2393 case 8:
2394 if (h->need_sps)
2395 goto exit_with_free;
2396 MP4E_set_pps(h->mux, h->mux_track_id, nal2 + 4, sizeof_nal - 4);
2397 h->need_pps = 0;
2398 break;
2399 case 5:
2400 if (h->need_sps)
2401 goto exit_with_free;
2402 h->need_idr = 0;
2403 // flow through
2404 default:
2405 if (h->need_sps)
2406 goto exit_with_free;
2407 if (!h->need_pps && !h->need_idr)
2408 {
2409 bit_reader_t bs[1];
2410 init_bits(bs, nal + 1, sizeof_nal - 4 - 1);
2411 unsigned first_mb_in_slice = ue_bits(bs);
2412 //unsigned slice_type = ue_bits(bs);
2413 int sample_kind = MP4E_SAMPLE_DEFAULT;
2414 nal2[0] = (unsigned char)((sizeof_nal - 4) >> 24);
2415 nal2[1] = (unsigned char)((sizeof_nal - 4) >> 16);
2416 nal2[2] = (unsigned char)((sizeof_nal - 4) >> 8);
2417 nal2[3] = (unsigned char)((sizeof_nal - 4));
2418 if (first_mb_in_slice)
2419 sample_kind = MP4E_SAMPLE_CONTINUATION;
2420 else if (payload_type == 5)
2421 sample_kind = MP4E_SAMPLE_RANDOM_ACCESS;
2422 err = MP4E_put_sample(h->mux, h->mux_track_id, nal2, sizeof_nal, timeStamp90kHz_next, sample_kind);
2423 }
2424 break;
2425 }
2426 free(nal1);
2427 free(nal2);
2428#else
2429 // No SPS/PPS transcoding
2430 // This branch assumes that encoder use correct SPS/PPS ID's
2431 switch (payload_type) {
2432 case 7:
2433 MP4E_set_sps(h->mux, h->mux_track_id, nal, sizeof_nal);
2434 h->need_sps = 0;
2435 break;
2436 case 8:
2437 MP4E_set_pps(h->mux, h->mux_track_id, nal, sizeof_nal);
2438 h->need_pps = 0;
2439 break;
2440 case 5:
2441 if (h->need_sps)
2443 h->need_idr = 0;
2444 // flow through
2445 default:
2446 if (h->need_sps)
2448 if (!h->need_pps && !h->need_idr)
2449 {
2450 bit_reader_t bs[1];
2451 unsigned char* tmp = (unsigned char*)malloc(4 + sizeof_nal);
2452 if (!tmp)
2453 return MP4E_STATUS_NO_MEMORY;
2454 init_bits(bs, nal + 1, sizeof_nal - 1);
2455 unsigned first_mb_in_slice = ue_bits(bs);
2456 int sample_kind = MP4E_SAMPLE_DEFAULT;
2457 tmp[0] = (unsigned char)(sizeof_nal >> 24);
2458 tmp[1] = (unsigned char)(sizeof_nal >> 16);
2459 tmp[2] = (unsigned char)(sizeof_nal >> 8);
2460 tmp[3] = (unsigned char)(sizeof_nal);
2461 memcpy(tmp + 4, nal, sizeof_nal);
2462 if (first_mb_in_slice)
2463 sample_kind = MP4E_SAMPLE_CONTINUATION;
2464 else if (payload_type == 5)
2465 sample_kind = MP4E_SAMPLE_RANDOM_ACCESS;
2466 err = MP4E_put_sample(h->mux, h->mux_track_id, tmp, 4 + sizeof_nal, timeStamp90kHz_next, sample_kind);
2467 free(tmp);
2468 }
2469 break;
2470 }
2471#endif
2472 if (err)
2473 break;
2474 }
2475 return err;
2476}
2477
2478#if MP4D_TRACE_SUPPORTED
2479# define TRACE(x) printf x
2480#else
2481# define TRACE(x)
2482#endif
2483
2484#define NELEM(x) (sizeof(x) / sizeof((x)[0]))
2485
2486static int minimp4_fgets(MP4D_demux_t* mp4)
2487{
2488 uint8_t c;
2489 if (mp4->read_callback(mp4->read_pos, &c, 1, mp4->token))
2490 return -1;
2491 mp4->read_pos++;
2492 return c;
2493}
2494
2499static unsigned minimp4_read(MP4D_demux_t* mp4, int nb, int* eof_flag)
2500{
2501 uint32_t v = 0; int last_byte;
2502 switch (nb)
2503 {
2504 case 4: v = (v << 8) | minimp4_fgets(mp4);
2505 case 3: v = (v << 8) | minimp4_fgets(mp4);
2506 case 2: v = (v << 8) | minimp4_fgets(mp4);
2507 default:
2508 case 1: v = (v << 8) | (last_byte = minimp4_fgets(mp4));
2509 }
2510 if (last_byte < 0)
2511 {
2512 *eof_flag = 1;
2513 }
2514 return v;
2515}
2516
2521static uint32_t read_payload(MP4D_demux_t* mp4, unsigned nb, boxsize_t* payload_bytes, int* eof_flag)
2522{
2523 if (*payload_bytes < nb)
2524 {
2525 *eof_flag = 1;
2526 nb = (int)*payload_bytes;
2527 }
2528 *payload_bytes -= nb;
2529
2530 return minimp4_read(mp4, nb, eof_flag);
2531}
2532
2537static void my_fseek(MP4D_demux_t* mp4, boxsize_t pos, int* eof_flag)
2538{
2539 mp4->read_pos += pos;
2540 if (mp4->read_pos >= mp4->read_size)
2541 *eof_flag = 1;
2542}
2543
2544#define READ(n) read_payload(mp4, n, &payload_bytes, &eof_flag)
2545#define SKIP(n) { boxsize_t t = MINIMP4_MIN(payload_bytes, n); my_fseek(mp4, t, &eof_flag); payload_bytes -= t; }
2546#define MALLOC(t, p, size) p = (t)malloc(size); if (!(p)) { ERROR("out of memory"); }
2547
2548/*
2549* On error: release resources.
2550*/
2551#define RETURN_ERROR(mess) { \
2552 TRACE(("\nMP4 ERROR: " mess)); \
2553 MP4D_close(mp4); \
2554 return 0; \
2555}
2556
2557/*
2558* Any errors, occurred on top-level hierarchy is passed to exit check: 'if (!mp4->track_count) ... '
2559*/
2560#define ERROR(mess) \
2561 if (!depth) \
2562 break; \
2563 else \
2564 RETURN_ERROR(mess);
2565
2566typedef enum { BOX_ATOM, BOX_OD } boxtype_t;
2567
2568int MP4D_open(MP4D_demux_t* mp4, int (*read_callback)(int64_t offset, void* buffer, size_t size, void* token), void* token, int64_t file_size)
2569{
2570 // box stack size
2571 int depth = 0;
2572
2573 struct
2574 {
2575 // remaining bytes for box in the stack
2576 boxsize_t bytes;
2577
2578 // kind of box children's: OD chunks handled in the same manner as name chunks
2579 boxtype_t format;
2580
2581 } stack[MAX_CHUNKS_DEPTH];
2582
2583#if MP4D_TRACE_SUPPORTED
2584 // path of current element: List0/List1/... etc
2585 uint32_t box_path[MAX_CHUNKS_DEPTH];
2586#endif
2587
2588 int eof_flag = 0;
2589 unsigned i;
2590 MP4D_track_t* tr = NULL;
2591
2592 if (!mp4 || !read_callback)
2593 {
2594 TRACE(("\nERROR: invlaid arguments!"));
2595 return 0;
2596 }
2597
2598 memset(mp4, 0, sizeof(MP4D_demux_t));
2600 mp4->token = token;
2601 mp4->read_size = file_size;
2602
2603 stack[0].format = BOX_ATOM; // start with atom box
2604 stack[0].bytes = 0; // never accessed
2605
2606 do
2607 {
2608 // List of boxes, derived from 'FullBox'
2609 // ~~~~~~~~~~~~~~~~~~~~~
2610 // need read version field and check version for these boxes
2611 static const struct
2612 {
2613 uint32_t name;
2614 unsigned max_version;
2615 unsigned use_track_flag;
2616 } g_fullbox[] =
2617 {
2618#if MP4D_INFO_SUPPORTED
2619 {BOX_mdhd, 1, 1},
2620 {BOX_mvhd, 1, 0},
2621 {BOX_hdlr, 0, 0},
2622 {BOX_meta, 0, 0}, // Android can produce meta box without 'FullBox' field, comment this line to simulate the bug
2623#endif
2624#if MP4D_TRACE_TIMESTAMPS
2625 {BOX_stts, 0, 0},
2626 {BOX_ctts, 0, 0},
2627#endif
2628 {BOX_stz2, 0, 1},
2629 {BOX_stsz, 0, 1},
2630 {BOX_stsc, 0, 1},
2631 {BOX_stco, 0, 1},
2632 {BOX_co64, 0, 1},
2633 {BOX_stsd, 0, 0},
2634 {BOX_esds, 0, 1} // esds does not use track, but switches to OD mode. Check here, to avoid OD check
2635 };
2636
2637 // List of boxes, which contains other boxes ('envelopes')
2638 // Parser will descend down for boxes in this list, otherwise parsing will proceed to
2639 // the next sibling box
2640 // OD boxes handled in the same way as atom boxes...
2641 static const struct
2642 {
2643 uint32_t name;
2644 boxtype_t type;
2645 } g_envelope_box[] =
2646 {
2647 {BOX_esds, BOX_OD}, // TODO: BOX_esds can be used for both audio and video, but this code supports audio only!
2648 {OD_ESD, BOX_OD},
2649 {OD_DCD, BOX_OD},
2650 {OD_DSI, BOX_OD},
2651 {BOX_trak, BOX_ATOM},
2652 {BOX_moov, BOX_ATOM},
2653 //{BOX_moof, BOX_ATOM},
2654 {BOX_mdia, BOX_ATOM},
2655 {BOX_tref, BOX_ATOM},
2656 {BOX_minf, BOX_ATOM},
2657 {BOX_dinf, BOX_ATOM},
2658 {BOX_stbl, BOX_ATOM},
2659 {BOX_stsd, BOX_ATOM},
2660 {BOX_mp4a, BOX_ATOM},
2661 {BOX_mp4s, BOX_ATOM},
2662#if MP4D_AVC_SUPPORTED
2663 {BOX_mp4v, BOX_ATOM},
2664 {BOX_avc1, BOX_ATOM},
2665 //{BOX_avc2, BOX_ATOM},
2666 //{BOX_svc1, BOX_ATOM},
2667#endif
2668#if MP4D_HEVC_SUPPORTED
2669 {BOX_hvc1, BOX_ATOM},
2670#endif
2671 {BOX_udta, BOX_ATOM},
2672 {BOX_meta, BOX_ATOM},
2673 {BOX_ilst, BOX_ATOM}
2674 };
2675
2676 uint32_t FullAtomVersionAndFlags = 0;
2677 boxsize_t payload_bytes;
2678 boxsize_t box_bytes;
2679 uint32_t box_name;
2680#if MP4D_INFO_SUPPORTED
2681 unsigned char** ptag = NULL;
2682#endif
2683 int read_bytes = 0;
2684
2685 // Read header box type and it's length
2686 if (stack[depth].format == BOX_ATOM)
2687 {
2688 box_bytes = minimp4_read(mp4, 4, &eof_flag);
2689#if FIX_BAD_ANDROID_META_BOX
2690 broken_android_meta_hack :
2691#endif
2692 if (eof_flag)
2693 break; // normal exit
2694
2695 if (box_bytes >= 2 && box_bytes < 8)
2696 {
2697 ERROR("invalid box size (broken file?)");
2698 }
2699
2700 box_name = minimp4_read(mp4, 4, &eof_flag);
2701 read_bytes = 8;
2702
2703 // Decode box size
2704 if (box_bytes == 0 || // standard indication of 'till eof' size
2705 box_bytes == (boxsize_t)0xFFFFFFFFU // some files uses non-standard 'till eof' signaling
2706 )
2707 {
2708 box_bytes = ~(boxsize_t)0;
2709 }
2710
2711 payload_bytes = box_bytes - 8;
2712
2713 if (box_bytes == 1) // 64-bit sizes
2714 {
2715 TRACE(("\n64-bit chunk encountered"));
2716
2717 box_bytes = minimp4_read(mp4, 4, &eof_flag);
2718#if MP4D_64BIT_SUPPORTED
2719 box_bytes <<= 32;
2720 box_bytes |= minimp4_read(mp4, 4, &eof_flag);
2721#else
2722 if (box_bytes)
2723 {
2724 ERROR("UNSUPPORTED FEATURE: MP4BoxHeader(): 64-bit boxes not supported!");
2725 }
2726 box_bytes = minimp4_read(mp4, 4, &eof_flag);
2727#endif
2728 if (box_bytes < 16)
2729 {
2730 ERROR("invalid box size (broken file?)");
2731 }
2732 payload_bytes = box_bytes - 16;
2733 }
2734
2735 // Read and check box version for some boxes
2736 for (i = 0; i < NELEM(g_fullbox); i++)
2737 {
2738 if (box_name == g_fullbox[i].name)
2739 {
2740 FullAtomVersionAndFlags = READ(4);
2741 read_bytes += 4;
2742
2743#if FIX_BAD_ANDROID_META_BOX
2744 // Fix invalid BOX_meta, found in some Android-produced MP4
2745 // This branch is optional: bad box would be skipped
2746 if (box_name == BOX_meta)
2747 {
2748 if (FullAtomVersionAndFlags >= 8 && FullAtomVersionAndFlags < payload_bytes)
2749 {
2750 if (box_bytes > stack[depth].bytes)
2751 {
2752 ERROR("broken file structure!");
2753 }
2754 stack[depth].bytes -= box_bytes;;
2755 depth++;
2756 stack[depth].bytes = payload_bytes + 4; // +4 need for missing header
2757 stack[depth].format = BOX_ATOM;
2758 box_bytes = FullAtomVersionAndFlags;
2759 TRACE(("Bad metadata box detected (Android bug?)!\n"));
2760 goto broken_android_meta_hack;
2761 }
2762 }
2763#endif // FIX_BAD_ANDROID_META_BOX
2764
2765 if ((FullAtomVersionAndFlags >> 24) > g_fullbox[i].max_version)
2766 {
2767 ERROR("unsupported box version!");
2768 }
2769 if (g_fullbox[i].use_track_flag && !tr)
2770 {
2771 ERROR("broken file structure!");
2772 }
2773 }
2774 }
2775 }
2776 else // stack[depth].format == BOX_OD
2777 {
2778 int val;
2779 box_name = OD_BASE + minimp4_read(mp4, 1, &eof_flag); // 1-byte box type
2780 read_bytes += 1;
2781 if (eof_flag)
2782 break;
2783
2784 payload_bytes = 0;
2785 box_bytes = 1;
2786 do
2787 {
2788 val = minimp4_read(mp4, 1, &eof_flag);
2789 read_bytes += 1;
2790 if (eof_flag)
2791 {
2792 ERROR("premature EOF!");
2793 }
2794 payload_bytes = (payload_bytes << 7) | (val & 0x7F);
2795 box_bytes++;
2796 } while (val & 0x80);
2797 box_bytes += payload_bytes;
2798 }
2799
2800#if MP4D_TRACE_SUPPORTED
2801 box_path[depth] = (box_name >> 24) | (box_name << 24) | ((box_name >> 8) & 0x0000FF00) | ((box_name << 8) & 0x00FF0000);
2802 TRACE(("%2d %8d %.*s (%d bytes remains for sibilings) \n", depth, (int)box_bytes, depth * 4, (char*)box_path, (int)stack[depth].bytes));
2803#endif
2804
2805 // Check that box size <= parent size
2806 if (depth)
2807 {
2808 // Skip box with bad size
2809 assert(box_bytes > 0);
2810 if (box_bytes > stack[depth].bytes)
2811 {
2812 TRACE(("Wrong %c%c%c%c box size: broken file?\n", (box_name >> 24) & 255, (box_name >> 16) & 255, (box_name >> 8) & 255, box_name & 255));
2813 box_bytes = stack[depth].bytes;
2814 box_name = 0;
2815 payload_bytes = box_bytes - read_bytes;
2816 }
2817 stack[depth].bytes -= box_bytes;
2818 }
2819
2820 // Read box header
2821 switch (box_name)
2822 {
2823 case BOX_stz2: //ISO/IEC 14496-1 Page 38. Section 8.17.2 - Sample Size Box.
2824 case BOX_stsz:
2825 {
2826 int size = 0;
2827 uint32_t sample_size = READ(4);
2828 tr->sample_count = READ(4);
2829 MALLOC(unsigned int*, tr->entry_size, tr->sample_count * 4);
2830 for (i = 0; i < tr->sample_count; i++)
2831 {
2832 if (box_name == BOX_stsz)
2833 {
2834 tr->entry_size[i] = (sample_size ? sample_size : READ(4));
2835 }
2836 else
2837 {
2838 switch (sample_size & 0xFF)
2839 {
2840 case 16:
2841 tr->entry_size[i] = READ(2);
2842 break;
2843 case 8:
2844 tr->entry_size[i] = READ(1);
2845 break;
2846 case 4:
2847 if (i & 1)
2848 {
2849 tr->entry_size[i] = size & 15;
2850 }
2851 else
2852 {
2853 size = READ(1);
2854 tr->entry_size[i] = (size >> 4);
2855 }
2856 break;
2857 }
2858 }
2859 }
2860 }
2861 break;
2862
2863 case BOX_stsc: //ISO/IEC 14496-12 Page 38. Section 8.18 - Sample To Chunk Box.
2864 tr->sample_to_chunk_count = READ(4);
2866 for (i = 0; i < tr->sample_to_chunk_count; i++)
2867 {
2868 tr->sample_to_chunk[i].first_chunk = READ(4);
2869 tr->sample_to_chunk[i].samples_per_chunk = READ(4);
2870 SKIP(4); // sample_description_index
2871 }
2872 break;
2873#if MP4D_TRACE_TIMESTAMPS || MP4D_TIMESTAMPS_SUPPORTED
2874 case BOX_stts:
2875 {
2876 unsigned count = READ(4);
2877 unsigned j, k = 0, ts = 0, ts_count = count;
2878#if MP4D_TIMESTAMPS_SUPPORTED
2879 MALLOC(unsigned int*, tr->timestamp, ts_count * 4);
2880 MALLOC(unsigned int*, tr->duration, ts_count * 4);
2881#endif
2882
2883 for (i = 0; i < count; i++)
2884 {
2885 unsigned sc = READ(4);
2886 int d = READ(4);
2887 TRACE(("sample %8d count %8d duration %8d\n", i, sc, d));
2888#if MP4D_TIMESTAMPS_SUPPORTED
2889 if (k + sc > ts_count)
2890 {
2891 ts_count = k + sc;
2892 tr->timestamp = (unsigned int*)realloc(tr->timestamp, ts_count * sizeof(unsigned));
2893 tr->duration = (unsigned int*)realloc(tr->duration, ts_count * sizeof(unsigned));
2894 }
2895 for (j = 0; j < sc; j++)
2896 {
2897 tr->duration[k] = d;
2898 tr->timestamp[k++] = ts;
2899 ts += d;
2900 }
2901#endif
2902 }
2903 }
2904 break;
2905 case BOX_ctts:
2906 {
2907 unsigned count = READ(4);
2908 for (i = 0; i < count; i++)
2909 {
2910 int sc = READ(4);
2911 int d = READ(4);
2912 (void)sc;
2913 (void)d;
2914 TRACE(("sample %8d count %8d decoding to composition offset %8d\n", i, sc, d));
2915 }
2916 }
2917 break;
2918#endif
2919 case BOX_stco: //ISO/IEC 14496-12 Page 39. Section 8.19 - Chunk Offset Box.
2920 case BOX_co64:
2921 tr->chunk_count = READ(4);
2923 for (i = 0; i < tr->chunk_count; i++)
2924 {
2925 tr->chunk_offset[i] = READ(4);
2926 if (box_name == BOX_co64)
2927 {
2928#if !MP4D_64BIT_SUPPORTED
2929 if (tr->chunk_offset[i])
2930 {
2931 ERROR("UNSUPPORTED FEATURE: 64-bit chunk_offset not supported!");
2932 }
2933#endif
2934 tr->chunk_offset[i] <<= 32;
2935 tr->chunk_offset[i] |= READ(4);
2936 }
2937 }
2938 break;
2939
2940#if MP4D_INFO_SUPPORTED
2941 case BOX_mvhd:
2942 SKIP(((FullAtomVersionAndFlags >> 24) == 1) ? 8 + 8 : 4 + 4);
2943 mp4->timescale = READ(4);
2944 mp4->duration_hi = ((FullAtomVersionAndFlags >> 24) == 1) ? READ(4) : 0;
2945 mp4->duration_lo = READ(4);
2946 SKIP(4 + 2 + 2 + 4 * 2 + 4 * 9 + 4 * 6 + 4);
2947 break;
2948
2949 case BOX_mdhd:
2950 SKIP(((FullAtomVersionAndFlags >> 24) == 1) ? 8 + 8 : 4 + 4);
2951 tr->timescale = READ(4);
2952 tr->duration_hi = ((FullAtomVersionAndFlags >> 24) == 1) ? READ(4) : 0;
2953 tr->duration_lo = READ(4);
2954
2955 {
2956 int ISO_639_2_T = READ(2);
2957 tr->language[2] = (ISO_639_2_T & 31) + 0x60; ISO_639_2_T >>= 5;
2958 tr->language[1] = (ISO_639_2_T & 31) + 0x60; ISO_639_2_T >>= 5;
2959 tr->language[0] = (ISO_639_2_T & 31) + 0x60;
2960 }
2961 // the rest of this box is skipped by default ...
2962 break;
2963
2964 case BOX_hdlr:
2965 if (tr) // When this box is within 'meta' box, the track may not be avaialable
2966 {
2967 SKIP(4); // pre_defined
2968 tr->handler_type = READ(4);
2969 }
2970 // typically hdlr box does not contain any useful info.
2971 // the rest of this box is skipped by default ...
2972 break;
2973
2974 case BOX_btrt:
2975 if (!tr)
2976 {
2977 ERROR("broken file structure!");
2978 }
2979
2980 SKIP(4 + 4);
2981 tr->avg_bitrate_bps = READ(4);
2982 break;
2983
2984 // Set pointer to tag to be read...
2985 case BOX_calb: ptag = &mp4->tag.album; break;
2986 case BOX_cART: ptag = &mp4->tag.artist; break;
2987 case BOX_cnam: ptag = &mp4->tag.title; break;
2988 case BOX_cday: ptag = &mp4->tag.year; break;
2989 case BOX_ccmt: ptag = &mp4->tag.comment; break;
2990 case BOX_cgen: ptag = &mp4->tag.genre; break;
2991
2992#endif
2993
2994 case BOX_stsd:
2995 SKIP(4); // entry_count, BOX_mp4a & BOX_mp4v boxes follows immediately
2996 break;
2997
2998 case BOX_mp4s: // private stream
2999 if (!tr)
3000 {
3001 ERROR("broken file structure!");
3002 }
3003 SKIP(6 * 1 + 2/*Base SampleEntry*/);
3004 break;
3005
3006 case BOX_mp4a:
3007 if (!tr)
3008 {
3009 ERROR("broken file structure!");
3010 }
3011#if MP4D_INFO_SUPPORTED
3012 SKIP(6 * 1 + 2/*Base SampleEntry*/ + 4 * 2);
3013 tr->SampleDescription.audio.channelcount = READ(2);
3014 SKIP(2/*samplesize*/ + 2 + 2);
3015 tr->SampleDescription.audio.samplerate_hz = READ(4) >> 16;
3016#else
3017 SKIP(28);
3018#endif
3019 break;
3020
3021#if MP4D_AVC_SUPPORTED
3022 case BOX_avc1: // AVCSampleEntry extends VisualSampleEntry
3023// case BOX_avc2: - no test
3024// case BOX_svc1: - no test
3025 case BOX_mp4v:
3026 if (!tr)
3027 {
3028 ERROR("broken file structure!");
3029 }
3030#if MP4D_INFO_SUPPORTED
3031 SKIP(6 * 1 + 2/*Base SampleEntry*/ + 2 + 2 + 4 * 3);
3032 tr->SampleDescription.video.width = READ(2);
3033 tr->SampleDescription.video.height = READ(2);
3034 // frame_count is always 1
3035 // compressorname is rarely set..
3036 SKIP(4 + 4 + 4 + 2/*frame_count*/ + 32/*compressorname*/ + 2 + 2);
3037#else
3038 SKIP(78);
3039#endif
3040 // ^^^ end of VisualSampleEntry
3041 // now follows for BOX_avc1:
3042 // BOX_avcC
3043 // BOX_btrt (optional)
3044 // BOX_m4ds (optional)
3045 // for BOX_mp4v:
3046 // BOX_esds
3047 break;
3048
3049 case BOX_avcC: // AVCDecoderConfigurationRecord()
3050 // hack: AAC-specific DSI field reused (for it have same purpoose as sps/pps)
3051 // TODO: check this hack if BOX_esds co-exist with BOX_avcC
3053 tr->dsi = (unsigned char*)malloc((size_t)box_bytes);
3054 tr->dsi_bytes = (unsigned)box_bytes;
3055 {
3056 int spspps;
3057 unsigned char* p = tr->dsi;
3058 unsigned int configurationVersion = READ(1);
3059 unsigned int AVCProfileIndication = READ(1);
3060 unsigned int profile_compatibility = READ(1);
3061 unsigned int AVCLevelIndication = READ(1);
3062 //bit(6) reserved =
3063 unsigned int lengthSizeMinusOne = READ(1) & 3;
3064
3065 (void)configurationVersion;
3066 (void)AVCProfileIndication;
3067 (void)profile_compatibility;
3068 (void)AVCLevelIndication;
3069 (void)lengthSizeMinusOne;
3070
3071 for (spspps = 0; spspps < 2; spspps++)
3072 {
3073 unsigned int numOfSequenceParameterSets = READ(1);
3074 if (!spspps)
3075 {
3076 numOfSequenceParameterSets &= 31; // clears 3 msb for SPS
3077 }
3078 *p++ = numOfSequenceParameterSets;
3079 for (i = 0; i < numOfSequenceParameterSets; i++)
3080 {
3081 unsigned k, sequenceParameterSetLength = READ(2);
3082 *p++ = sequenceParameterSetLength >> 8;
3083 *p++ = sequenceParameterSetLength;
3084 for (k = 0; k < sequenceParameterSetLength; k++)
3085 {
3086 *p++ = READ(1);
3087 }
3088 }
3089 }
3090 }
3091 break;
3092#endif // MP4D_AVC_SUPPORTED
3093
3094 case OD_ESD:
3095 {
3096 unsigned flags = READ(3); // ES_ID(2) + flags(1)
3097
3098 if (flags & 0x80) // steamdependflag
3099 {
3100 SKIP(2); // dependsOnESID
3101 }
3102 if (flags & 0x40) // urlflag
3103 {
3104 unsigned bytecount = READ(1);
3105 SKIP(bytecount); // skip URL
3106 }
3107 if (flags & 0x20) // ocrflag (was reserved in MPEG-4 v.1)
3108 {
3109 SKIP(2); // OCRESID
3110 }
3111 break;
3112 }
3113
3114 case OD_DCD: //ISO/IEC 14496-1 Page 28. Section 8.6.5 - DecoderConfigDescriptor.
3115 assert(tr); // ensured by g_fullbox[] check
3116 tr->object_type_indication = READ(1);
3117#if MP4D_INFO_SUPPORTED
3118 tr->stream_type = READ(1) >> 2;
3119 SKIP(3/*bufferSizeDB*/ + 4/*maxBitrate*/);
3120 tr->avg_bitrate_bps = READ(4);
3121#else
3122 SKIP(1 + 3 + 4 + 4);
3123#endif
3124 break;
3125
3126 case OD_DSI: //ISO/IEC 14496-1 Page 28. Section 8.6.5 - DecoderConfigDescriptor.
3127 assert(tr); // ensured by g_fullbox[] check
3128 if (!tr->dsi && payload_bytes)
3129 {
3130 MALLOC(unsigned char*, tr->dsi, (int)payload_bytes);
3131 for (i = 0; i < payload_bytes; i++)
3132 {
3133 tr->dsi[i] = minimp4_read(mp4, 1, &eof_flag); // These bytes available due to check above
3134 }
3135 tr->dsi_bytes = i;
3136 payload_bytes -= i;
3137 break;
3138 }
3139
3140 default:
3141 TRACE(("[%c%c%c%c] %d\n", box_name >> 24, box_name >> 16, box_name >> 8, box_name, (int)payload_bytes));
3142 }
3143
3144#if MP4D_INFO_SUPPORTED
3145 // Read tag is tag pointer is set
3146 if (ptag && !*ptag && payload_bytes > 16)
3147 {
3148#if 0
3149 uint32_t size = READ(4);
3150 uint32_t data = READ(4);
3151 uint32_t class = READ(4);
3152 uint32_t x1 = READ(4);
3153 TRACE(("%2d %2d %2d ", size, class, x1));
3154#else
3155 SKIP(4 + 4 + 4 + 4);
3156#endif
3157 MALLOC(unsigned char*, *ptag, (unsigned)payload_bytes + 1);
3158 for (i = 0; payload_bytes != 0; i++)
3159 {
3160 (*ptag)[i] = READ(1);
3161 }
3162 (*ptag)[i] = 0; // zero-terminated string
3163 }
3164#endif
3165
3166 if (box_name == BOX_trak)
3167 {
3168 // New track found: allocate memory using realloc()
3169 // Typically there are 1 audio track for AAC audio file,
3170 // 4 tracks for movie file,
3171 // 3-5 tracks for scalable audio (CELP+AAC)
3172 // and up to 50 tracks for BSAC scalable audio
3173 void* mem = realloc(mp4->track, (mp4->track_count + 1) * sizeof(MP4D_track_t));
3174 if (!mem)
3175 {
3176 // if realloc fails, it does not deallocate old pointer!
3177 ERROR("out of memory");
3178 }
3179 mp4->track = (MP4D_track_t*)mem;
3180 tr = mp4->track + mp4->track_count++;
3181 memset(tr, 0, sizeof(MP4D_track_t));
3182 }
3183 else if (box_name == BOX_meta)
3184 {
3185 tr = NULL; // Avoid update of 'hdlr' box, which may contains in the 'meta' box
3186 }
3187
3188 // If this box is envelope, save it's size in box stack
3189 for (i = 0; i < NELEM(g_envelope_box); i++)
3190 {
3191 if (box_name == g_envelope_box[i].name)
3192 {
3193 if (++depth >= MAX_CHUNKS_DEPTH)
3194 {
3195 ERROR("too deep atoms nesting!");
3196 }
3197 stack[depth].bytes = payload_bytes;
3198 stack[depth].format = g_envelope_box[i].type;
3199 break;
3200 }
3201 }
3202
3203 // if box is not envelope, just skip it
3204 if (i == NELEM(g_envelope_box))
3205 {
3206 if (payload_bytes > file_size)
3207 {
3208 eof_flag = 1;
3209 }
3210 else
3211 {
3212 SKIP(payload_bytes);
3213 }
3214 }
3215
3216 // remove empty boxes from stack
3217 // don't touch box with index 0 (which indicates whole file)
3218 while (depth > 0 && !stack[depth].bytes)
3219 {
3220 depth--;
3221 }
3222
3223 } while (!eof_flag);
3224
3225 if (!mp4->track_count)
3226 {
3227 RETURN_ERROR("no tracks found");
3228 }
3229 return 1;
3230}
3231
3236static int sample_to_chunk(MP4D_track_t* tr, unsigned nsample, unsigned* nfirst_sample_in_chunk)
3237{
3238 unsigned chunk_group = 0, nc;
3239 unsigned sum = 0;
3240 *nfirst_sample_in_chunk = 0;
3241 if (tr->chunk_count <= 1)
3242 {
3243 return 0;
3244 }
3245 for (nc = 0; nc < tr->chunk_count; nc++)
3246 {
3247 if (chunk_group + 1 < tr->sample_to_chunk_count // stuck at last entry till EOF
3248 && nc + 1 == // Chunks counted starting with '1'
3249 tr->sample_to_chunk[chunk_group + 1].first_chunk) // next group?
3250 {
3251 chunk_group++;
3252 }
3253
3254 sum += tr->sample_to_chunk[chunk_group].samples_per_chunk;
3255 if (nsample < sum)
3256 return nc;
3257
3258 // TODO: this can be calculated once per file
3259 *nfirst_sample_in_chunk = sum;
3260 }
3261 return -1;
3262}
3263
3264// Exported API function
3265MP4D_file_offset_t MP4D_frame_offset(const MP4D_demux_t* mp4, unsigned ntrack, unsigned nsample, unsigned* frame_bytes, unsigned* timestamp, unsigned* duration)
3266{
3267 MP4D_track_t* tr = mp4->track + ntrack;
3268 unsigned ns;
3269 int nchunk = sample_to_chunk(tr, nsample, &ns);
3270 MP4D_file_offset_t offset;
3271
3272 if (nchunk < 0)
3273 {
3274 *frame_bytes = 0;
3275 return 0;
3276 }
3277
3278 offset = tr->chunk_offset[nchunk];
3279 for (; ns < nsample; ns++)
3280 {
3281 offset += tr->entry_size[ns];
3282 }
3283
3284 *frame_bytes = tr->entry_size[ns];
3285
3286 if (timestamp)
3287 {
3288#if MP4D_TIMESTAMPS_SUPPORTED
3289 * timestamp = tr->timestamp[ns];
3290#else
3291 * timestamp = 0;
3292#endif
3293 }
3294 if (duration)
3295 {
3296#if MP4D_TIMESTAMPS_SUPPORTED
3297 * duration = tr->duration[ns];
3298#else
3299 * duration = 0;
3300#endif
3301 }
3302
3303 return offset;
3304}
3305
3306#define FREE(x) if (x) {free(x); x = NULL;}
3307
3308// Exported API function
3309void MP4D_close(MP4D_demux_t* mp4)
3310{
3311 while (mp4->track_count)
3312 {
3313 MP4D_track_t* tr = mp4->track + --mp4->track_count;
3314 FREE(tr->entry_size);
3315#if MP4D_TIMESTAMPS_SUPPORTED
3316 FREE(tr->timestamp);
3317 FREE(tr->duration);
3318#endif
3319 FREE(tr->sample_to_chunk);
3320 FREE(tr->chunk_offset);
3321 FREE(tr->dsi);
3322 }
3323 FREE(mp4->track);
3324#if MP4D_INFO_SUPPORTED
3325 FREE(mp4->tag.title);
3326 FREE(mp4->tag.artist);
3327 FREE(mp4->tag.album);
3328 FREE(mp4->tag.year);
3329 FREE(mp4->tag.comment);
3330 FREE(mp4->tag.genre);
3331#endif
3332}
3333
3334static int skip_spspps(const unsigned char* p, int nbytes, int nskip)
3335{
3336 int i, k = 0;
3337 for (i = 0; i < nskip; i++)
3338 {
3339 unsigned segmbytes;
3340 if (k > nbytes - 2)
3341 return -1;
3342 segmbytes = p[k] * 256 + p[k + 1];
3343 k += 2 + segmbytes;
3344 }
3345 return k;
3346}
3347
3348static const void* MP4D_read_spspps(const MP4D_demux_t* mp4, unsigned int ntrack, int pps_flag, int nsps, int* sps_bytes)
3349{
3350 int sps_count, skip_bytes;
3351 int bytepos = 0;
3352 unsigned char* p = mp4->track[ntrack].dsi;
3353 if (ntrack >= mp4->track_count)
3354 return NULL;
3356 return NULL; // SPS/PPS are specific for AVC format only
3357
3358 if (pps_flag)
3359 {
3360 // Skip all SPS
3361 sps_count = p[bytepos++];
3362 skip_bytes = skip_spspps(p + bytepos, mp4->track[ntrack].dsi_bytes - bytepos, sps_count);
3363 if (skip_bytes < 0)
3364 return NULL;
3365 bytepos += skip_bytes;
3366 }
3367
3368 // Skip sps/pps before the given target
3369 sps_count = p[bytepos++];
3370 if (nsps >= sps_count)
3371 return NULL;
3372 skip_bytes = skip_spspps(p + bytepos, mp4->track[ntrack].dsi_bytes - bytepos, nsps);
3373 if (skip_bytes < 0)
3374 return NULL;
3375 bytepos += skip_bytes;
3376 *sps_bytes = p[bytepos] * 256 + p[bytepos + 1];
3377 return p + bytepos + 2;
3378}
3379
3380
3381const void* MP4D_read_sps(const MP4D_demux_t* mp4, unsigned int ntrack, int nsps, int* sps_bytes)
3382{
3383 return MP4D_read_spspps(mp4, ntrack, 0, nsps, sps_bytes);
3384}
3385
3386const void* MP4D_read_pps(const MP4D_demux_t* mp4, unsigned int ntrack, int npps, int* pps_bytes)
3387{
3388 return MP4D_read_spspps(mp4, ntrack, 1, npps, pps_bytes);
3389}
3390
3391#if MP4D_PRINT_INFO_SUPPORTED
3392/************************************************************************/
3393/* Purely informational part, may be removed for embedded applications */
3394/************************************************************************/
3395
3396//
3397// Decodes ISO/IEC 14496 MP4 stream type to ASCII string
3398//
3399static const char* GetMP4StreamTypeName(int streamType)
3400{
3401 switch (streamType)
3402 {
3403 case 0x00: return "Forbidden";
3404 case 0x01: return "ObjectDescriptorStream";
3405 case 0x02: return "ClockReferenceStream";
3406 case 0x03: return "SceneDescriptionStream";
3407 case 0x04: return "VisualStream";
3408 case 0x05: return "AudioStream";
3409 case 0x06: return "MPEG7Stream";
3410 case 0x07: return "IPMPStream";
3411 case 0x08: return "ObjectContentInfoStream";
3412 case 0x09: return "MPEGJStream";
3413 default:
3414 if (streamType >= 0x20 && streamType <= 0x3F)
3415 {
3416 return "User private";
3417 }
3418 else
3419 {
3420 return "Reserved for ISO use";
3421 }
3422 }
3423}
3424
3425//
3426// Decodes ISO/IEC 14496 MP4 object type to ASCII string
3427//
3428static const char* GetMP4ObjectTypeName(int objectTypeIndication)
3429{
3430 switch (objectTypeIndication)
3431 {
3432 case 0x00: return "Forbidden";
3433 case 0x01: return "Systems ISO/IEC 14496-1";
3434 case 0x02: return "Systems ISO/IEC 14496-1";
3435 case 0x20: return "Visual ISO/IEC 14496-2";
3436 case 0x40: return "Audio ISO/IEC 14496-3";
3437 case 0x60: return "Visual ISO/IEC 13818-2 Simple Profile";
3438 case 0x61: return "Visual ISO/IEC 13818-2 Main Profile";
3439 case 0x62: return "Visual ISO/IEC 13818-2 SNR Profile";
3440 case 0x63: return "Visual ISO/IEC 13818-2 Spatial Profile";
3441 case 0x64: return "Visual ISO/IEC 13818-2 High Profile";
3442 case 0x65: return "Visual ISO/IEC 13818-2 422 Profile";
3443 case 0x66: return "Audio ISO/IEC 13818-7 Main Profile";
3444 case 0x67: return "Audio ISO/IEC 13818-7 LC Profile";
3445 case 0x68: return "Audio ISO/IEC 13818-7 SSR Profile";
3446 case 0x69: return "Audio ISO/IEC 13818-3";
3447 case 0x6A: return "Visual ISO/IEC 11172-2";
3448 case 0x6B: return "Audio ISO/IEC 11172-3";
3449 case 0x6C: return "Visual ISO/IEC 10918-1";
3450 case 0xFF: return "no object type specified";
3451 default:
3452 if (objectTypeIndication >= 0xC0 && objectTypeIndication <= 0xFE)
3453 return "User private";
3454 else
3455 return "Reserved for ISO use";
3456 }
3457}
3458
3488void MP4D_printf_info(const MP4D_demux_t* mp4)
3489{
3490 unsigned i;
3491 printf("\nMP4 FILE: %d tracks found. Movie time %.2f sec\n", mp4->track_count, (4294967296.0 * mp4->duration_hi + mp4->duration_lo) / mp4->timescale);
3492#define STR_TAG(name) if (mp4->tag.name) printf("%10s = %s\n", #name, mp4->tag.name)
3493 STR_TAG(title);
3494 STR_TAG(artist);
3495 STR_TAG(album);
3496 STR_TAG(year);
3497 STR_TAG(comment);
3498 STR_TAG(genre);
3499 printf("\nNo|type|lng| duration | bitrate| %-23s| Object type", "Stream type");
3500 for (i = 0; i < mp4->track_count; i++)
3501 {
3502 MP4D_track_t* tr = mp4->track + i;
3503
3504 printf("\n%2d|%c%c%c%c|%c%c%c|%7.2f s %6d frm| %7d|", i,
3505 (tr->handler_type >> 24), (tr->handler_type >> 16), (tr->handler_type >> 8), (tr->handler_type >> 0),
3506 tr->language[0], tr->language[1], tr->language[2],
3507 (65536.0 * 65536.0 * tr->duration_hi + tr->duration_lo) / tr->timescale,
3508 tr->sample_count,
3509 tr->avg_bitrate_bps);
3510
3511 printf(" %-23s|", GetMP4StreamTypeName(tr->stream_type));
3512 printf(" %-23s", GetMP4ObjectTypeName(tr->object_type_indication));
3513
3515 {
3517 }
3518 else if (tr->handler_type == MP4D_HANDLER_TYPE_VIDE)
3519 {
3521 }
3522 }
3523 printf("\n");
3524}
3525
3526#endif // MP4D_PRINT_INFO_SUPPORTED
3527#endif
const void * MP4D_read_sps(const MP4D_demux_t *mp4, unsigned int ntrack, int nsps, int *sps_bytes)
boxsize_t MP4D_file_offset_t
Definition minimp4.h:102
struct MP4E_mux_tag MP4E_mux_t
Definition minimp4.h:126
#define MP4E_SAMPLE_DEFAULT
Definition minimp4.h:89
#define MP4_OBJECT_TYPE_AVC
Definition minimp4.h:71
#define MP4E_STATUS_NO_MEMORY
Definition minimp4.h:82
int mp4_h26x_write_init(mp4_h26x_writer_t *h, MP4E_mux_t *mux, int width, int height, int is_hevc)
#define MINIMP4_MAX_SPS
Definition minimp4.h:32
void MP4D_close(MP4D_demux_t *mp4)
int MP4E_set_text_comment(MP4E_mux_t *mux, const char *comment)
MP4E_mux_t * MP4E_open(int sequential_mode_flag, int enable_fragmentation, void *token, int(*write_callback)(int64_t offset, const void *buffer, size_t size, void *token))
int MP4D_open(MP4D_demux_t *mp4, int(*read_callback)(int64_t offset, void *buffer, size_t size, void *token), void *token, int64_t file_size)
void mp4_h26x_write_close(mp4_h26x_writer_t *h)
struct MP4D_demux_tag MP4D_demux_t
#define MP4D_TFDT_SUPPORT
Definition minimp4.h:57
#define HEVC_NAL_CRA_NUT
Definition minimp4.h:120
int mp4_h26x_write_nal(mp4_h26x_writer_t *h, const unsigned char *nal, int length, unsigned timeStamp90kHz_next)
#define MP4E_HANDLER_TYPE_GESM
Definition minimp4.h:113
int MP4E_put_sample(MP4E_mux_t *mux, int track_num, const void *data, int data_bytes, int duration, int kind)
uint64_t boxsize_t
Definition minimp4.h:98
#define MP4E_STATUS_BAD_ARGUMENTS
Definition minimp4.h:81
#define MP4D_HANDLER_TYPE_SOUN
Definition minimp4.h:110
int MP4E_close(MP4E_mux_t *mux)
int MP4E_set_dsi(MP4E_mux_t *mux, int track_id, const void *dsi, int bytes)
int MP4E_set_sps(MP4E_mux_t *mux, int track_id, const void *sps, int bytes)
#define HEVC_NAL_VPS
Definition minimp4.h:116
#define MP4E_SAMPLE_RANDOM_ACCESS
Definition minimp4.h:90
#define MINIMP4_MAX_PPS
Definition minimp4.h:33
#define MP4E_STATUS_OK
Definition minimp4.h:80
#define MINIMP4_MIN(x, y)
Definition minimp4.h:22
int MP4E_add_track(MP4E_mux_t *mux, const MP4E_track_t *track_data)
MP4D_file_offset_t MP4D_frame_offset(const MP4D_demux_t *mp4, unsigned int ntrack, unsigned int nsample, unsigned int *frame_bytes, unsigned *timestamp, unsigned *duration)
track_media_kind_t
Definition minimp4.h:129
@ e_private
Definition minimp4.h:132
@ e_video
Definition minimp4.h:131
@ e_audio
Definition minimp4.h:130
struct mp4_h26x_writer_tag mp4_h26x_writer_t
int MP4E_set_vps(MP4E_mux_t *mux, int track_id, const void *vps, int bytes)
#define MAX_CHUNKS_DEPTH
Definition minimp4.h:30
const void * MP4D_read_pps(const MP4D_demux_t *mp4, unsigned int ntrack, int npps, int *pps_bytes)
#define HEVC_NAL_SPS
Definition minimp4.h:117
#define MP4E_STATUS_ONLY_ONE_DSI_ALLOWED
Definition minimp4.h:84
#define MP4_OBJECT_TYPE_AUDIO_ISO_IEC_14496_3
Definition minimp4.h:63
#define MP4_OBJECT_TYPE_HEVC
Definition minimp4.h:73
int MP4E_set_pps(MP4E_mux_t *mux, int track_id, const void *pps, int bytes)
#define HEVC_NAL_BLA_W_LP
Definition minimp4.h:119
#define MP4D_HANDLER_TYPE_VIDE
Definition minimp4.h:108
#define MP4E_SAMPLE_CONTINUATION
Definition minimp4.h:91
#define HEVC_NAL_PPS
Definition minimp4.h:118
AU_EXTERN AU_EXPORT void printf(const char *format,...)
Definition stdio.cpp:121
AU_EXPORT char * strdup(const char *c)
Definition string.cpp:237
#define RETURN_ERROR(StatusCode)
Definition Base.h:1000
XE_LIB void * memchr(const void *src, int c, size_t n)
Definition string.cpp:652
XETime t
Definition main.cpp:53
void * memcpy(void *Dest, const void *Src, ACPI_SIZE Count)
Definition utclib.c:310
int memcmp(void *Buffer1, void *Buffer2, ACPI_SIZE Count)
Definition utclib.c:222
ACPI_SIZE strlen(const char *String)
Definition utclib.c:379
void * memset(void *Dest, int Value, ACPI_SIZE Count)
Definition utclib.c:346
unsigned int uint32_t
Definition acefiex.h:163
COMPILER_DEPENDENT_INT64 int64_t
Definition acefiex.h:164
unsigned char uint8_t
Definition acefiex.h:161
COMPILER_DEPENDENT_UINT64 uint64_t
Definition acefiex.h:165
unsigned short int uint16_t
Definition acefiex.h:162
#define NULL
Definition actypes.h:561
#define assert(expression)
Definition assert.h:40
char end[]
boxsize_t MP4D_file_offset_t
Definition minimp4.h:102
struct MP4E_mux_tag MP4E_mux_t
Definition minimp4.h:126
track_media_kind_t
Definition minimp4.h:129
XE_LIB void * realloc(void *address, unsigned int new_size)
Definition _heap.cpp:502
XE_LIB void * malloc(unsigned int)
Definition _heap.cpp:281
XE_LIB void free(void *ptr)
Definition _heap.cpp:392
Definition minimp4.h:279
unsigned duration_hi
Definition minimp4.h:296
unsigned char * album
Definition minimp4.h:308
unsigned timescale
Definition minimp4.h:300
int64_t read_size
Definition minimp4.h:284
void * token
Definition minimp4.h:287
unsigned char * title
Definition minimp4.h:306
MP4D_track_t * track
Definition minimp4.h:285
int64_t read_pos
Definition minimp4.h:283
unsigned char * year
Definition minimp4.h:309
unsigned duration_lo
Definition minimp4.h:297
unsigned char * artist
Definition minimp4.h:307
unsigned char * comment
Definition minimp4.h:310
unsigned char * genre
Definition minimp4.h:311
unsigned track_count
Definition minimp4.h:289
struct MP4D_demux_tag::@481 tag
int(* read_callback)(int64_t offset, void *buffer, size_t size, void *token)
Definition minimp4.h:286
Definition minimp4.h:318
unsigned first_chunk
Definition minimp4.h:319
unsigned samples_per_chunk
Definition minimp4.h:320
Definition minimp4.h:170
unsigned sample_count
Definition minimp4.h:176
MP4D_file_offset_t * chunk_offset
Definition minimp4.h:269
unsigned channelcount
Definition minimp4.h:247
unsigned sample_to_chunk_count
Definition minimp4.h:265
unsigned chunk_count
Definition minimp4.h:268
unsigned width
Definition minimp4.h:254
unsigned height
Definition minimp4.h:255
unsigned * timestamp
Definition minimp4.h:272
union MP4D_track_t::@478 SampleDescription
unsigned samplerate_hz
Definition minimp4.h:248
struct MP4D_track_t::@478::@480 video
unsigned object_type_indication
Definition minimp4.h:203
unsigned timescale
Definition minimp4.h:221
struct MP4D_sample_to_chunk_t_tag * sample_to_chunk
Definition minimp4.h:266
unsigned * entry_size
Definition minimp4.h:263
unsigned char * dsi
Definition minimp4.h:179
unsigned duration_lo
Definition minimp4.h:218
unsigned handler_type
Definition minimp4.h:214
unsigned * duration
Definition minimp4.h:273
unsigned dsi_bytes
Definition minimp4.h:182
unsigned char language[4]
Definition minimp4.h:227
unsigned duration_hi
Definition minimp4.h:217
unsigned stream_type
Definition minimp4.h:240
struct MP4D_track_t::@478::@479 audio
unsigned avg_bitrate_bps
Definition minimp4.h:224
Definition minimp4.h:136
int width
Definition minimp4.h:160
track_media_kind_t track_media_kind
Definition minimp4.h:144
unsigned time_scale
Definition minimp4.h:147
unsigned char language[4]
Definition minimp4.h:142
unsigned channelcount
Definition minimp4.h:155
struct MP4E_track_t::@475::@477 v
int height
Definition minimp4.h:161
unsigned object_type_indication
Definition minimp4.h:139
unsigned default_duration
Definition minimp4.h:148
union MP4E_track_t::@475 u
Definition minimp4.h:324
int map_pps[MINIMP4_MAX_PPS]
Definition minimp4.h:331
void * sps_cache[MINIMP4_MAX_SPS]
Definition minimp4.h:325
int sps_bytes[MINIMP4_MAX_SPS]
Definition minimp4.h:327
void * pps_cache[MINIMP4_MAX_PPS]
Definition minimp4.h:326
int map_sps[MINIMP4_MAX_SPS]
Definition minimp4.h:330
int pps_bytes[MINIMP4_MAX_PPS]
Definition minimp4.h:328
Definition minimp4.h:336
int need_sps
Definition minimp4.h:341
MP4E_mux_t * mux
Definition minimp4.h:340
int need_pps
Definition minimp4.h:341
int need_vps
Definition minimp4.h:341
int is_hevc
Definition minimp4.h:341
int need_idr
Definition minimp4.h:341
h264_sps_id_patcher_t sps_patcher
Definition minimp4.h:338
int mux_track_id
Definition minimp4.h:341
size_t(* write_callback)(struct __VFS_NODE__ *node, struct __VFS_NODE__ *file, uint64_t *buffer, uint32_t length)
Definition vfs.h:77
size_t(* read_callback)(struct __VFS_NODE__ *node, struct __VFS_NODE__ *file, uint64_t *buffer, uint32_t length)
Definition vfs.h:75