/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "mp4.h" static int get_mp4tags(PerlIO *infile, char *file, HV *info, HV *tags) { mp4info *mp4 = _mp4_parse(infile, file, info, tags, 0); Safefree(mp4); return 0; } // wrapper to return just the file offset int mp4_find_frame(PerlIO *infile, char *file, int offset) { HV *info = newHV(); int frame_offset = -1; mp4_find_frame_return_info(infile, file, offset, info); if ( my_hv_exists(info, "seek_offset") ) { frame_offset = SvIV( *(my_hv_fetch(info, "seek_offset") ) ); } SvREFCNT_dec(info); return frame_offset; } // offset is in ms // This is based on code from Rockbox int mp4_find_frame_return_info(PerlIO *infile, char *file, int offset, HV *info) { int ret = 1; uint16_t samplerate = 0; uint32_t sound_sample_loc; uint32_t i = 0; uint32_t j = 0; uint32_t new_sample = 0; uint32_t new_sound_sample = 0; uint32_t chunk = 1; uint32_t range_samples = 0; uint32_t total_samples = 0; uint32_t skipped_samples = 0; uint32_t chunk_sample; uint32_t prev_chunk; uint32_t prev_chunk_samples; uint32_t file_offset; uint32_t chunk_offset; uint32_t box_size = 0; Buffer tmp_buf; char tmp_size[4]; // We need to read all info first to get some data we need to calculate HV *tags = newHV(); mp4info *mp4 = _mp4_parse(infile, file, info, tags, 1); // Init seek buffer // Newz(0, &tmp_buf, sizeof(Buffer), Buffer); buffer_init(&tmp_buf, MP4_BLOCK_SIZE); // Seeking not yet supported for files with multiple tracks if (mp4->track_count > 1) { ret = -1; goto out; } if ( !my_hv_exists(info, "samplerate") ) { PerlIO_printf(PerlIO_stderr(), "find_frame: unknown sample rate\n"); ret = -1; goto out; } // Pull out the samplerate samplerate = SvIV( *( my_hv_fetch( info, "samplerate" ) ) ); // convert offset to sound_sample_loc sound_sample_loc = (offset / 10) * (samplerate / 100); DEBUG_TRACE("Looking for target sample %u\n", sound_sample_loc); // Make sure we have the necessary metadata if ( !mp4->num_time_to_samples || !mp4->num_sample_byte_sizes || !mp4->num_sample_to_chunks || !mp4->num_chunk_offsets ) { PerlIO_printf(PerlIO_stderr(), "find_frame: File does not contain seek metadata: %s\n", file); ret = -1; goto out; } // Find the destination block from time_to_sample array while ( (i < mp4->num_time_to_samples) && (new_sound_sample < sound_sample_loc) ) { j = (sound_sample_loc - new_sound_sample) / mp4->time_to_sample[i].sample_duration; DEBUG_TRACE( "i = %d / j = %d, sample_count[i]: %d, sample_duration[i]: %d\n", i, j, mp4->time_to_sample[i].sample_count, mp4->time_to_sample[i].sample_duration ); if (j <= mp4->time_to_sample[i].sample_count) { new_sample += j; new_sound_sample += j * mp4->time_to_sample[i].sample_duration; break; } else { // XXX need test for this bit of code (variable stts) new_sound_sample += (mp4->time_to_sample[i].sample_duration * mp4->time_to_sample[i].sample_count); new_sample += mp4->time_to_sample[i].sample_count; i++; } } if ( new_sample >= mp4->num_sample_byte_sizes ) { PerlIO_printf(PerlIO_stderr(), "find_frame: Offset out of range (%d >= %d)\n", new_sample, mp4->num_sample_byte_sizes); ret = -1; goto out; } DEBUG_TRACE("new_sample: %d, new_sound_sample: %d\n", new_sample, new_sound_sample); // Write new stts box { int i; uint32_t total_sample_count = _mp4_total_samples(mp4); uint32_t stts_entries = total_sample_count - new_sample; uint32_t cur_duration = 0; struct tts *stts; int32_t stts_index = -1; Newz(0, stts, stts_entries * sizeof(*stts), struct tts); for (i = new_sample; i < total_sample_count; i++) { uint32_t duration = _mp4_get_sample_duration(mp4, i); if (cur_duration && cur_duration == duration) { // same as previous entry, combine together stts_entries--; stts[stts_index].sample_count++; } else { stts_index++; stts[stts_index].sample_count = 1; stts[stts_index].sample_duration = duration; cur_duration = duration; } } DEBUG_TRACE("Writing new stts (entries: %d)\n", stts_entries); buffer_put_int(&tmp_buf, stts_entries); for (i = 0; i < stts_entries; i++) { DEBUG_TRACE(" sample_count %d, sample_duration %d\n", stts[i].sample_count, stts[i].sample_duration); buffer_put_int(&tmp_buf, stts[i].sample_count); buffer_put_int(&tmp_buf, stts[i].sample_duration); } mp4->new_stts = newSVpv("", 0); put_u32( tmp_size, buffer_len(&tmp_buf) + 12 ); sv_catpvn( mp4->new_stts, tmp_size, 4 ); sv_catpvn( mp4->new_stts, "stts", 4 ); sv_catpvn( mp4->new_stts, "\0\0\0\0", 4 ); sv_catpvn( mp4->new_stts, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) ); //buffer_dump(&tmp_buf, 0); buffer_clear(&tmp_buf); Safefree(stts); } // We know the new block, now calculate the file position /* Locate the chunk containing the sample */ prev_chunk = mp4->sample_to_chunk[0].first_chunk; prev_chunk_samples = mp4->sample_to_chunk[0].samples_per_chunk; for (i = 1; i < mp4->num_sample_to_chunks; i++) { chunk = mp4->sample_to_chunk[i].first_chunk; range_samples = (chunk - prev_chunk) * prev_chunk_samples; DEBUG_TRACE("prev_chunk: %d, prev_chunk_samples: %d, chunk: %d, range_samples: %d\n", prev_chunk, prev_chunk_samples, chunk, range_samples); if (new_sample < total_samples + range_samples) break; total_samples += range_samples; prev_chunk = mp4->sample_to_chunk[i].first_chunk; prev_chunk_samples = mp4->sample_to_chunk[i].samples_per_chunk; } DEBUG_TRACE("prev_chunk: %d, prev_chunk_samples: %d, total_samples: %d\n", prev_chunk, prev_chunk_samples, total_samples); if (new_sample >= mp4->sample_to_chunk[0].samples_per_chunk) { chunk = prev_chunk + (new_sample - total_samples) / prev_chunk_samples; } else { chunk = 1; } DEBUG_TRACE("chunk: %d\n", chunk); /* Get sample of the first sample in the chunk */ chunk_sample = total_samples + (chunk - prev_chunk) * prev_chunk_samples; DEBUG_TRACE("chunk_sample: %d\n", chunk_sample); /* Get offset in file */ if (chunk > mp4->num_chunk_offsets) { file_offset = mp4->chunk_offset[mp4->num_chunk_offsets - 1]; } else { file_offset = mp4->chunk_offset[chunk - 1]; } DEBUG_TRACE("file_offset: %d\n", file_offset); if (chunk_sample > new_sample) { PerlIO_printf(PerlIO_stderr(), "find_frame: sample out of range (%d > %d)\n", chunk_sample, new_sample); ret = -1; goto out; } // Move offset within the chunk to the correct sample range for (i = chunk_sample; i < new_sample; i++) { file_offset += mp4->sample_byte_size[i]; skipped_samples++; DEBUG_TRACE(" file_offset + %d: %d\n", mp4->sample_byte_size[i], file_offset); } if (file_offset > mp4->audio_offset + mp4->audio_size) { PerlIO_printf(PerlIO_stderr(), "find_frame: file offset out of range (%d > %lld)\n", file_offset, mp4->audio_offset + mp4->audio_size); ret = -1; goto out; } // Write new stsc box { int i; uint32_t stsc_entries = mp4->num_chunk_offsets - chunk + 1; uint32_t cur_samples_per_chunk = 0; struct stc *stsc; int32_t stsc_index = -1; uint32_t chunk_delta = 1; j = 1; Newz(0, stsc, stsc_entries * sizeof(*stsc), struct stc); for (i = chunk; i <= mp4->num_chunk_offsets; i++) { // Find the number of samples in chunk i uint32_t samples_in_chunk = _mp4_samples_in_chunk(mp4, i); if (cur_samples_per_chunk && cur_samples_per_chunk == samples_in_chunk) { // same as previous entry, combine together stsc_entries--; } else { stsc_index++; stsc[stsc_index].first_chunk = chunk_delta; if (j == 1) { // The first chunk may have less samples in it due to seeking within a chunk stsc[stsc_index].samples_per_chunk = samples_in_chunk - skipped_samples; cur_samples_per_chunk = samples_in_chunk - skipped_samples; j++; } else { stsc[stsc_index].samples_per_chunk = samples_in_chunk; cur_samples_per_chunk = samples_in_chunk; } } chunk_delta++; } DEBUG_TRACE("Writing new stsc (entries: %d)\n", stsc_entries); buffer_put_int(&tmp_buf, stsc_entries); for (i = 0; i < stsc_entries; i++) { DEBUG_TRACE(" first_chunk %d, samples_per_chunk %d\n", stsc[i].first_chunk, stsc[i].samples_per_chunk); buffer_put_int(&tmp_buf, stsc[i].first_chunk); buffer_put_int(&tmp_buf, stsc[i].samples_per_chunk); buffer_put_int(&tmp_buf, 1); // XXX sample description index, is this OK? } mp4->new_stsc = newSVpv("", 0); put_u32( tmp_size, buffer_len(&tmp_buf) + 12 ); sv_catpvn( mp4->new_stsc, tmp_size, 4 ); sv_catpvn( mp4->new_stsc, "stsc", 4 ); sv_catpvn( mp4->new_stsc, "\0\0\0\0", 4 ); sv_catpvn( mp4->new_stsc, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) ); DEBUG_TRACE("Created new stsc\n"); //buffer_dump(&tmp_buf, 0); buffer_clear(&tmp_buf); Safefree(stsc); } // Write new stsz box, num_sample_byte_sizes -= $new_sample, skip $new_sample items buffer_put_int(&tmp_buf, 0); buffer_put_int(&tmp_buf, mp4->num_sample_byte_sizes - new_sample); DEBUG_TRACE("Writing new stsz: %d items\n", mp4->num_sample_byte_sizes - new_sample); j = 1; for (i = new_sample; i < mp4->num_sample_byte_sizes; i++) { DEBUG_TRACE(" sample %d sample_byte_size %d\n", j++, mp4->sample_byte_size[i]); buffer_put_int(&tmp_buf, mp4->sample_byte_size[i]); } mp4->new_stsz = newSVpv("", 0); put_u32( tmp_size, buffer_len(&tmp_buf) + 12 ); sv_catpvn( mp4->new_stsz, tmp_size, 4 ); sv_catpvn( mp4->new_stsz, "stsz", 4 ); sv_catpvn( mp4->new_stsz, "\0\0\0\0", 4 ); sv_catpvn( mp4->new_stsz, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) ); DEBUG_TRACE("Created new stsz\n"); //buffer_dump(&tmp_buf, 0); buffer_clear(&tmp_buf); // Total up size of 4 new st* boxes // stco is calculated directly since we can't write it without offsets mp4->new_st_size = sv_len(mp4->new_stts) + sv_len(mp4->new_stsc) + sv_len(mp4->new_stsz) + 12 + ( 4 * (mp4->num_chunk_offsets - chunk + 2) ); // stco size DEBUG_TRACE("new_st_size: %d, old_st_size: %d\n", mp4->new_st_size, mp4->old_st_size); // Calculate offset for each chunk chunk_offset = SvIV( *( my_hv_fetch(info, "audio_offset") ) ); chunk_offset -= ( mp4->old_st_size - mp4->new_st_size ); chunk_offset += 8; // mdat size + fourcc DEBUG_TRACE("chunk_offset: %d\n", chunk_offset); // Write new stco box, num_chunk_offsets -= $chunk, skip $chunk items buffer_put_int(&tmp_buf, mp4->num_chunk_offsets - chunk + 1); DEBUG_TRACE("Writing new stco: %d items\n", mp4->num_chunk_offsets - chunk + 1); for (i = chunk - 1; i < mp4->num_chunk_offsets; i++) { if (i == chunk - 1) { // The first chunk offset is the start of mdat (chunk_offset) buffer_put_int( &tmp_buf, chunk_offset ); DEBUG_TRACE( " offset %d (orig %d)\n", chunk_offset, mp4->chunk_offset[i] ); } else { buffer_put_int( &tmp_buf, mp4->chunk_offset[i] - file_offset + chunk_offset ); DEBUG_TRACE( " offset %d (orig %d)\n", mp4->chunk_offset[i] - file_offset + chunk_offset, mp4->chunk_offset[i] ); } } mp4->new_stco = newSVpv("", 0); put_u32( tmp_size, buffer_len(&tmp_buf) + 12 ); sv_catpvn( mp4->new_stco, tmp_size, 4 ); sv_catpvn( mp4->new_stco, "stco", 4 ); sv_catpvn( mp4->new_stco, "\0\0\0\0", 4 ); sv_catpvn( mp4->new_stco, (char *)buffer_ptr(&tmp_buf), buffer_len(&tmp_buf) ); DEBUG_TRACE("Created new stco\n"); //buffer_dump(&tmp_buf, 0); buffer_clear(&tmp_buf); DEBUG_TRACE("real st size: %ld\n", sv_len(mp4->new_stts) + sv_len(mp4->new_stsc) + sv_len(mp4->new_stsz) + sv_len(mp4->new_stco) ); // Make second pass through header, reducing size of all parent boxes by st* size difference // Copy all boxes, replacing st* boxes with new ones mp4->seekhdr = newSVpv("", 0); PerlIO_seek(mp4->infile, 0, SEEK_SET); // XXX this is ugly, because we are reading a second time we have to reset // various things in the mp4 struct Newz(0, mp4->buf, sizeof(Buffer), Buffer); buffer_init(mp4->buf, MP4_BLOCK_SIZE); mp4->audio_offset = 0; mp4->current_track = 0; mp4->track_count = 0; // free seek structs because we will be reading them a second time if (mp4->time_to_sample) Safefree(mp4->time_to_sample); if (mp4->sample_to_chunk) Safefree(mp4->sample_to_chunk); if (mp4->sample_byte_size) Safefree(mp4->sample_byte_size); if (mp4->chunk_offset) Safefree(mp4->chunk_offset); mp4->time_to_sample = NULL; mp4->sample_to_chunk = NULL; mp4->sample_byte_size = NULL; mp4->chunk_offset = NULL; while ( (box_size = _mp4_read_box(mp4)) > 0 ) { mp4->audio_offset += box_size; DEBUG_TRACE("seek pass 2: read box of size %d\n", box_size); if (mp4->audio_offset >= mp4->file_size) break; } my_hv_store( info, "seek_offset", newSVuv(file_offset) ); my_hv_store( info, "seek_header", mp4->seekhdr ); if (mp4->buf) { buffer_free(mp4->buf); Safefree(mp4->buf); } out: // Don't leak SvREFCNT_dec(tags); if (mp4->new_stts) SvREFCNT_dec(mp4->new_stts); if (mp4->new_stsc) SvREFCNT_dec(mp4->new_stsc); if (mp4->new_stsz) SvREFCNT_dec(mp4->new_stsz); if (mp4->new_stco) SvREFCNT_dec(mp4->new_stco); // free seek structs if (mp4->time_to_sample) Safefree(mp4->time_to_sample); if (mp4->sample_to_chunk) Safefree(mp4->sample_to_chunk); if (mp4->sample_byte_size) Safefree(mp4->sample_byte_size); if (mp4->chunk_offset) Safefree(mp4->chunk_offset); // free seek buffer buffer_free(&tmp_buf); Safefree(mp4); if (ret == -1) { my_hv_store( info, "seek_offset", newSViv(-1) ); } return ret; } mp4info * _mp4_parse(PerlIO *infile, char *file, HV *info, HV *tags, uint8_t seeking) { off_t file_size; uint32_t box_size = 0; mp4info *mp4; Newz(0, mp4, sizeof(mp4info), mp4info); Newz(0, mp4->buf, sizeof(Buffer), Buffer); mp4->audio_offset = 0; mp4->infile = infile; mp4->file = file; mp4->info = info; mp4->tags = tags; mp4->current_track = 0; mp4->track_count = 0; mp4->seen_moov = 0; mp4->seeking = seeking ? 1 : 0; mp4->time_to_sample = NULL; mp4->sample_to_chunk = NULL; mp4->sample_byte_size = NULL; mp4->chunk_offset = NULL; buffer_init(mp4->buf, MP4_BLOCK_SIZE); file_size = _file_size(infile); mp4->file_size = file_size; my_hv_store( info, "file_size", newSVuv(file_size) ); // Create empty tracks array my_hv_store( info, "tracks", newRV_noinc( (SV *)newAV() ) ); while ( (box_size = _mp4_read_box(mp4)) > 0 ) { mp4->audio_offset += box_size; DEBUG_TRACE("read box of size %d / audio_offset %llu\n", box_size, mp4->audio_offset); if (mp4->audio_offset >= file_size) break; } // XXX: if no ftyp was found, assume it is brand 'mp41' // if no bitrate was found (i.e. ALAC), calculate based on file_size/song_length_ms if ( !my_hv_exists(info, "avg_bitrate") ) { SV **entry = my_hv_fetch(info, "song_length_ms"); if (entry) { SV **audio_offset = my_hv_fetch(info, "audio_offset"); if (audio_offset) { uint32_t song_length_ms = SvIV(*entry); uint32_t bitrate = _bitrate(file_size - SvIV(*audio_offset), song_length_ms); my_hv_store( info, "avg_bitrate", newSVuv(bitrate) ); mp4->bitrate = bitrate; } } } // DLNA detection, based on code from libdlna if (!mp4->dlna_invalid && mp4->samplerate && mp4->bitrate && mp4->channels) { switch (mp4->audio_object_type) { case AAC_LC: case AAC_LC_ER: { if (mp4->samplerate < 8000 || mp4->samplerate > 48000) break; if (mp4->channels <= 2) { if (mp4->bitrate <= 192000) my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO_192", 0) ); else if (mp4->bitrate <= 320000) my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO_320", 0) ); else if (mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("AAC_ISO", 0) ); } else if (mp4->channels <= 6) { if (mp4->bitrate <= 1440000) my_hv_store( info, "dlna_profile", newSVpv("AAC_MULT5_ISO", 0) ); } break; } case AAC_LTP: case AAC_LTP_ER: { if (mp4->samplerate < 8000) break; if (mp4->samplerate <= 48000) { if (mp4->channels <= 2 && mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_ISO", 0) ); } else if (mp4->samplerate <= 96000) { if (mp4->channels <= 6 && mp4->bitrate <= 2880000) my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_MULT5_ISO", 0) ); else if (mp4->channels <= 8 && mp4->bitrate <= 4032000) my_hv_store( info, "dlna_profile", newSVpv("AAC_LTP_MULT7_ISO", 0) ); } break; } case AAC_HE: { if (mp4->samplerate < 8000) break; if (mp4->samplerate <= 24000) { if (mp4->channels > 2) break; if (mp4->bitrate <= 128000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO_128", 0) ); else if (mp4->bitrate <= 320000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO_320", 0) ); else if (mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L2_ISO", 0) ); } else if (mp4->samplerate <= 48000) { if (mp4->channels <= 2 && mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_L3_ISO", 0) ); else if (mp4->channels <= 6 && mp4->bitrate <= 1440000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT5_ISO", 0) ); else if (mp4->channels <= 8 && mp4->bitrate <= 4032000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT7", 0) ); } else if (mp4->samplerate <= 96000) { if (mp4->channels <= 8 && mp4->bitrate <= 4032000) my_hv_store( info, "dlna_profile", newSVpv("HEAAC_MULT7", 0) ); } break; } case AAC_PARAM_ER: case AAC_PS: { if (mp4->samplerate < 8000) break; if (mp4->samplerate <= 24000) { if (mp4->channels > 2) break; if (mp4->bitrate <= 128000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2_128", 0) ); else if (mp4->bitrate <= 320000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2_320", 0) ); else if (mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L2", 0) ); } else if (mp4->samplerate <= 48000) { if (mp4->channels <= 2 && mp4->bitrate <= 576000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L3", 0) ); else if (mp4->channels <= 6 && mp4->bitrate <= 1440000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_L4", 0) ); else if (mp4->channels <= 6 && mp4->bitrate <= 2880000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT5", 0) ); else if (mp4->channels <= 8 && mp4->bitrate <= 4032000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT7", 0) ); } else if (mp4->samplerate <= 96000) { if (mp4->channels <= 8 && mp4->bitrate <= 4032000) my_hv_store( info, "dlna_profile", newSVpv("HEAACv2_MULT7", 0) ); } break; } case AAC_BSAC_ER: { if (mp4->samplerate < 16000 || mp4->samplerate > 48000) break; if (mp4->bitrate > 128000) break; if (mp4->channels <= 2) my_hv_store( info, "dlna_profile", newSVpv("BSAC_ISO", 0) ); else if (mp4->channels <= 6) my_hv_store( info, "dlna_profile", newSVpv("BSAC_MULT5_ISO", 0) ); break; } default: break; } } buffer_free(mp4->buf); Safefree(mp4->buf); return mp4; } int _mp4_read_box(mp4info *mp4) { uint64_t size; // total size of box char type[5]; uint8_t skip = 0; mp4->rsize = 0; // remaining size in box if ( !_check_buf(mp4->infile, mp4->buf, 16, MP4_BLOCK_SIZE) ) { return 0; } size = buffer_get_int(mp4->buf); strncpy( type, (char *)buffer_ptr(mp4->buf), 4 ); type[4] = '\0'; buffer_consume(mp4->buf, 4); // Check for 64-bit size if (size == 1) { size = buffer_get_int64(mp4->buf); mp4->hsize = 16; } else if (size == 0) { // XXX: size extends to end of file mp4->hsize = 8; } else { mp4->hsize = 8; } if (size) { mp4->rsize = size - mp4->hsize; } mp4->size = size; DEBUG_TRACE("%s size %llu\n", type, size); if (mp4->seekhdr) { // Copy and adjust header if seeking char tmp_size[4]; if ( FOURCC_EQ(type, "moov") || FOURCC_EQ(type, "trak") || FOURCC_EQ(type, "mdia") || FOURCC_EQ(type, "minf") || FOURCC_EQ(type, "stbl") ) { // Container box, adjust size put_u32(tmp_size, size - (mp4->old_st_size - mp4->new_st_size)); DEBUG_TRACE(" Box is parent of st*, changed size to %llu\n", size - (mp4->old_st_size - mp4->new_st_size)); sv_catpvn( mp4->seekhdr, tmp_size, 4 ); sv_catpvn( mp4->seekhdr, type, 4 ); } // Replace st* boxes with our new versions else if ( FOURCC_EQ(type, "stts") ) { DEBUG_TRACE("adding new stts of size %ld\n", sv_len(mp4->new_stts)); sv_catsv( mp4->seekhdr, mp4->new_stts ); } else if ( FOURCC_EQ(type, "stsc") ) { DEBUG_TRACE("adding new stsc of size %ld\n", sv_len(mp4->new_stsc)); sv_catsv( mp4->seekhdr, mp4->new_stsc ); } else if ( FOURCC_EQ(type, "stsz") ) { DEBUG_TRACE("adding new stsz of size %ld\n", sv_len(mp4->new_stsz)); sv_catsv( mp4->seekhdr, mp4->new_stsz ); } else if ( FOURCC_EQ(type, "stco") ) { DEBUG_TRACE("adding new stco of size %ld\n", sv_len(mp4->new_stco)); sv_catsv( mp4->seekhdr, mp4->new_stco ); } else { // Normal box, copy it put_u32(tmp_size, size); sv_catpvn( mp4->seekhdr, tmp_size, 4 ); sv_catpvn( mp4->seekhdr, type, 4 ); // stsd is special and contains real bytes and is also a container if ( FOURCC_EQ(type, "stsd") ) { sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), 8 ); } // mp4a is special, ugh else if ( FOURCC_EQ(type, "mp4a") ) { sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), 28 ); } // and so is meta else if ( FOURCC_EQ(type, "meta") ) { sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), mp4->meta_size ); } // Copy contents unless it's a container else if ( !FOURCC_EQ(type, "edts") && !FOURCC_EQ(type, "dinf") && !FOURCC_EQ(type, "udta") && !FOURCC_EQ(type, "mdat") ) { if ( !_check_buf(mp4->infile, mp4->buf, size - 8, MP4_BLOCK_SIZE) ) { return 0; } // XXX find a way to skip udta completely when rewriting seek header // to avoid useless copying of artwork. Will require adjusting offsets // differently. sv_catpvn( mp4->seekhdr, (char *)buffer_ptr(mp4->buf), size - 8 ); } } // XXX should probably return size here and avoid reading info a second time // or move the header copying code to somewhere else } if ( FOURCC_EQ(type, "ftyp") ) { if ( !_mp4_parse_ftyp(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad ftyp box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "moov") || FOURCC_EQ(type, "edts") || FOURCC_EQ(type, "mdia") || FOURCC_EQ(type, "minf") || FOURCC_EQ(type, "dinf") || FOURCC_EQ(type, "stbl") || FOURCC_EQ(type, "udta") ) { // These boxes are containers for nested boxes, return only the fact that // we read the header size of the container size = mp4->hsize; if ( FOURCC_EQ(type, "trak") ) { mp4->track_count++; } } else if ( FOURCC_EQ(type, "trak") ) { // Also a container, but we need to increment track_count too size = mp4->hsize; mp4->track_count++; } else if ( FOURCC_EQ(type, "mvhd") ) { mp4->seen_moov = 1; if ( !_mp4_parse_mvhd(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mvhd box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "tkhd") ) { if ( !_mp4_parse_tkhd(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad tkhd box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "mdhd") ) { if ( !_mp4_parse_mdhd(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mdhd box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "hdlr") ) { if ( !_mp4_parse_hdlr(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad hdlr box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "stsd") ) { if ( !_mp4_parse_stsd(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsd box): %s\n", mp4->file); return 0; } // stsd is a special real box + container, count only the real bytes (8) size = 8 + mp4->hsize; } else if ( FOURCC_EQ(type, "mp4a") ) { if ( !_mp4_parse_mp4a(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad mp4a box): %s\n", mp4->file); return 0; } // mp4a is a special real box + container, count only the real bytes (28) size = 28 + mp4->hsize; } else if ( FOURCC_EQ(type, "alac") ) { if ( !_mp4_parse_alac(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad alac box): %s\n", mp4->file); return 0; } // skip rest (alac description) mp4->rsize -= 28; skip = 1; } else if ( FOURCC_EQ(type, "drms") ) { // Mark encoding HV *trackinfo = _mp4_get_current_trackinfo(mp4); my_hv_store( trackinfo, "encoding", newSVpvn("drms", 4) ); // Skip rest skip = 1; } else if ( FOURCC_EQ(type, "esds") ) { if ( !_mp4_parse_esds(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad esds box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "stts") ) { if ( mp4->seeking && mp4->track_count == 1 ) { if ( !_mp4_parse_stts(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stts box): %s\n", mp4->file); return 0; } mp4->old_st_size += size; } else { skip = 1; } } else if ( FOURCC_EQ(type, "stsc") ) { if ( mp4->seeking && mp4->track_count == 1 ) { if ( !_mp4_parse_stsc(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsc box): %s\n", mp4->file); return 0; } mp4->old_st_size += size; } else { skip = 1; } } else if ( FOURCC_EQ(type, "stsz") ) { if ( mp4->seeking && mp4->track_count == 1 ) { if ( !_mp4_parse_stsz(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stsz box): %s\n", mp4->file); return 0; } mp4->old_st_size += size; } else { skip = 1; } } else if ( FOURCC_EQ(type, "stco") ) { if ( mp4->seeking && mp4->track_count == 1 ) { if ( !_mp4_parse_stco(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad stco box): %s\n", mp4->file); return 0; } mp4->old_st_size += size; } else { skip = 1; } } else if ( FOURCC_EQ(type, "meta") ) { uint8_t meta_size = _mp4_parse_meta(mp4); if ( !meta_size ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad meta box): %s\n", mp4->file); return 0; } mp4->meta_size = meta_size; // meta is a special real box + container, count only the real bytes size = meta_size + mp4->hsize; } else if ( FOURCC_EQ(type, "ilst") ) { if ( !_mp4_parse_ilst(mp4) ) { PerlIO_printf(PerlIO_stderr(), "Invalid MP4 file (bad ilst box): %s\n", mp4->file); return 0; } } else if ( FOURCC_EQ(type, "mdat") ) { // Audio data here, there may be boxes after mdat, so we have to skip it skip = 1; // If we haven't seen moov yet, set a flag so we can print a warning // or handle it some other way if ( !mp4->seen_moov ) { my_hv_store( mp4->info, "leading_mdat", newSVuv(1) ); mp4->dlna_invalid = 1; // DLNA 8.6.34.8, moov must be before mdat } // Record audio offset and length my_hv_store( mp4->info, "audio_offset", newSVuv(mp4->audio_offset) ); my_hv_store( mp4->info, "audio_size", newSVuv(size) ); mp4->audio_size = size; } else { DEBUG_TRACE(" Unhandled box, skipping\n"); skip = 1; } if (skip) { _mp4_skip(mp4, mp4->rsize); } return size; } uint8_t _mp4_parse_ftyp(mp4info *mp4) { AV *compatible_brands = newAV(); if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } my_hv_store( mp4->info, "major_brand", newSVpvn( buffer_ptr(mp4->buf), 4 ) ); buffer_consume(mp4->buf, 4); my_hv_store( mp4->info, "minor_version", newSVuv( buffer_get_int(mp4->buf) ) ); mp4->rsize -= 8; if (mp4->rsize % 4) { // invalid ftyp return 0; } while (mp4->rsize > 0) { av_push( compatible_brands, newSVpvn( buffer_ptr(mp4->buf), 4 ) ); buffer_consume(mp4->buf, 4); mp4->rsize -= 4; } my_hv_store( mp4->info, "compatible_brands", newRV_noinc( (SV *)compatible_brands ) ); return 1; } uint8_t _mp4_parse_mvhd(mp4info *mp4) { uint32_t timescale; uint8_t version; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } version = buffer_get_char(mp4->buf); buffer_consume(mp4->buf, 3); // flags if (version == 0) { // 32-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 8); timescale = buffer_get_int(mp4->buf); my_hv_store( mp4->info, "mv_timescale", newSVuv(timescale) ); my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } else if (version == 1) { // 64-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 16); timescale = buffer_get_int(mp4->buf); my_hv_store( mp4->info, "mv_timescale", newSVuv(timescale) ); my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } else { return 0; } // Skip rest buffer_consume(mp4->buf, 80); return 1; } uint8_t _mp4_parse_tkhd(mp4info *mp4) { AV *tracks = (AV *)SvRV( *(my_hv_fetch(mp4->info, "tracks")) ); HV *trackinfo = newHV(); uint32_t id; double width; double height; uint8_t version; uint32_t timescale = SvIV( *(my_hv_fetch(mp4->info, "mv_timescale")) ); if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } version = buffer_get_char(mp4->buf); buffer_consume(mp4->buf, 3); // flags // XXX DLNA Requirement [8.6.34.5]: For the default audio track, "Track_enabled" // must be set to the value of 1 in the "flags" field of Track Header Box of the track. if (version == 0) { // 32-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 8); id = buffer_get_int(mp4->buf); my_hv_store( trackinfo, "id", newSVuv(id) ); // Skip reserved buffer_consume(mp4->buf, 4); my_hv_store( trackinfo, "duration", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } else if (version == 1) { // 64-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 16); id = buffer_get_int(mp4->buf); my_hv_store( trackinfo, "id", newSVuv(id) ); // Skip reserved buffer_consume(mp4->buf, 4); my_hv_store( trackinfo, "duration", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } else { return 0; } // Skip reserved, layer, alternate_group, volume, reserved, matrix buffer_consume(mp4->buf, 52); // width/height are fixed-point 16.16 width = buffer_get_short(mp4->buf); width += buffer_get_short(mp4->buf) / 65536.; if (width > 0) { my_hv_store( trackinfo, "width", newSVnv(width) ); } height = buffer_get_short(mp4->buf); height += buffer_get_short(mp4->buf) / 65536.; if (height > 0) { my_hv_store( trackinfo, "height", newSVnv(height) ); } av_push( tracks, newRV_noinc( (SV *)trackinfo ) ); // Remember the current track we're dealing with mp4->current_track = id; return 1; } uint8_t _mp4_parse_mdhd(mp4info *mp4) { uint32_t timescale; uint8_t version; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } version = buffer_get_char(mp4->buf); buffer_consume(mp4->buf, 3); // flags if (version == 0) { // 32-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 8); timescale = buffer_get_int(mp4->buf); my_hv_store( mp4->info, "samplerate", newSVuv(timescale) ); // Skip duration, if have song_length_ms from mvhd if ( my_hv_exists( mp4->info, "song_length_ms" ) ) { buffer_consume(mp4->buf, 4); } else { my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } } else if (version == 1) { // 64-bit values // Skip ctime and mtime buffer_consume(mp4->buf, 16); timescale = buffer_get_int(mp4->buf); my_hv_store( mp4->info, "samplerate", newSVuv(timescale) ); // Skip duration, if have song_length_ms from mvhd if ( my_hv_exists( mp4->info, "song_length_ms" ) ) { buffer_consume(mp4->buf, 8); } else { my_hv_store( mp4->info, "song_length_ms", newSVuv( (buffer_get_int64(mp4->buf) * 1.0 / timescale ) * 1000 ) ); } } else { return 0; } mp4->samplerate = timescale; // Skip rest buffer_consume(mp4->buf, 4); return 1; } uint8_t _mp4_parse_hdlr(mp4info *mp4) { HV *trackinfo = _mp4_get_current_trackinfo(mp4); SV *handler_name; if (!trackinfo) { return 0; } if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version, flags, pre_defined buffer_consume(mp4->buf, 8); my_hv_store( trackinfo, "handler_type", newSVpvn( buffer_ptr(mp4->buf), 4 ) ); buffer_consume(mp4->buf, 4); // Skip reserved buffer_consume(mp4->buf, 12); handler_name = newSVpv( buffer_ptr(mp4->buf), 0 ); sv_utf8_decode(handler_name); my_hv_store( trackinfo, "handler_name", handler_name ); buffer_consume(mp4->buf, mp4->rsize - 24); return 1; } uint8_t _mp4_parse_stsd(mp4info *mp4) { uint32_t entry_count; if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); entry_count = buffer_get_int(mp4->buf); return 1; } uint8_t _mp4_parse_mp4a(mp4info *mp4) { HV *trackinfo = _mp4_get_current_trackinfo(mp4); if ( !_check_buf(mp4->infile, mp4->buf, 28, MP4_BLOCK_SIZE) ) { return 0; } my_hv_store( trackinfo, "encoding", newSVpvn("mp4a", 4) ); // Skip reserved buffer_consume(mp4->buf, 16); mp4->channels = buffer_get_short(mp4->buf); my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) ); my_hv_store( trackinfo, "bits_per_sample", newSVuv( buffer_get_short(mp4->buf) ) ); // Skip reserved buffer_consume(mp4->buf, 4); // Skip bogus samplerate buffer_consume(mp4->buf, 2); // Skip reserved buffer_consume(mp4->buf, 2); return 1; } uint8_t _mp4_parse_esds(mp4info *mp4) { HV *trackinfo = _mp4_get_current_trackinfo(mp4); uint32_t len = 0; uint32_t avg_bitrate; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); // Public docs on esds are hard to find, this is based on faad // and http://www.geocities.com/xhelmboyx/quicktime/formats/mp4-layout.txt // verify ES_DescrTag if (buffer_get_char(mp4->buf) == 0x03) { // read length if ( _mp4_descr_length(mp4->buf) < 5 + 15 ) { return 0; } // skip 3 bytes buffer_consume(mp4->buf, 3); } else { // skip 2 bytes buffer_consume(mp4->buf, 2); } // verify DecoderConfigDescrTab if (buffer_get_char(mp4->buf) != 0x04) { return 0; } // read length if ( _mp4_descr_length(mp4->buf) < 13 ) { return 0; } // XXX: map to string my_hv_store( trackinfo, "audio_type", newSVuv( buffer_get_char(mp4->buf) ) ); buffer_consume(mp4->buf, 4); my_hv_store( trackinfo, "max_bitrate", newSVuv( buffer_get_int(mp4->buf) ) ); avg_bitrate = buffer_get_int(mp4->buf); if (avg_bitrate) { if ( my_hv_exists(mp4->info, "avg_bitrate") ) { // If there are multiple tracks, just add up the bitrates avg_bitrate += SvIV(*(my_hv_fetch(mp4->info, "avg_bitrate"))); } my_hv_store( mp4->info, "avg_bitrate", newSVuv(avg_bitrate) ); mp4->bitrate = avg_bitrate; } // verify DecSpecificInfoTag if (buffer_get_char(mp4->buf) != 0x05) { return 0; } // Read audio object type // 5 bits, if 0x1F, read 6 more bits len = _mp4_descr_length(mp4->buf); if (len > 0) { uint32_t aot; len *= 8; // count the number of bits left aot = buffer_get_bits(mp4->buf, 5); len -= 5; if ( aot == 0x1F ) { aot = 32 + buffer_get_bits(mp4->buf, 6); len -= 6; } // samplerate: 4 bits // if 0xF, samplerate is next 24 bits // else lookup in samplerate table { uint32_t samplerate = buffer_get_bits(mp4->buf, 4); len -= 4; if (samplerate == 0xF) { // XXX need test file with 24-bit samplerate field samplerate = buffer_get_bits(mp4->buf, 24); len -= 24; } else { samplerate = samplerate_table[samplerate]; } // Channel configuration (4 bits) // XXX This is sometimes wrong (1 when it should be 2) mp4->channels = buffer_get_bits(mp4->buf, 4); my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) ); len -= 4; if (aot == AAC_SLS) { // Read some SLS-specific config // bits per sample (3 bits) { 8, 16, 20, 24 } uint8_t bps = buffer_get_bits(mp4->buf, 3); len -= 3; my_hv_store( trackinfo, "bits_per_sample", newSVuv( bps_table[bps] ) ); } else if (aot == AAC_HE || aot == AAC_PS) { // Read extended samplerate info samplerate = buffer_get_bits(mp4->buf, 4); len -= 4; if (samplerate == 0xF) { // XXX need test file with 24-bit samplerate field samplerate = buffer_get_bits(mp4->buf, 24); len -= 24; } else { samplerate = samplerate_table[samplerate]; } } my_hv_store( trackinfo, "samplerate", newSVuv(samplerate) ); mp4->samplerate = samplerate; } my_hv_store( trackinfo, "audio_object_type", newSVuv(aot) ); mp4->audio_object_type = aot; // Skip rest of box buffer_get_bits(mp4->buf, len); } // verify SL config descriptor type tag if (buffer_get_char(mp4->buf) != 0x06) { return 0; } _mp4_descr_length(mp4->buf); // verify SL value if (buffer_get_char(mp4->buf) != 0x02) { return 0; } return 1; } uint8_t _mp4_parse_alac(mp4info *mp4) { HV *trackinfo = _mp4_get_current_trackinfo(mp4); if ( !_check_buf(mp4->infile, mp4->buf, 28, MP4_BLOCK_SIZE) ) { return 0; } my_hv_store( trackinfo, "encoding", newSVpvn("alac", 4) ); // Skip reserved buffer_consume(mp4->buf, 16); mp4->channels = buffer_get_short(mp4->buf); my_hv_store( trackinfo, "channels", newSVuv(mp4->channels) ); my_hv_store( trackinfo, "bits_per_sample", newSVuv( buffer_get_short(mp4->buf) ) ); // Skip reserved buffer_consume(mp4->buf, 4); // Skip bogus samplerate buffer_consume(mp4->buf, 2); // Skip reserved buffer_consume(mp4->buf, 2); return 1; } uint8_t _mp4_parse_stts(mp4info *mp4) { int i; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); mp4->num_time_to_samples = buffer_get_int(mp4->buf); DEBUG_TRACE(" num_time_to_samples %d\n", mp4->num_time_to_samples); New(0, mp4->time_to_sample, mp4->num_time_to_samples * sizeof(*mp4->time_to_sample), struct tts ); if ( !mp4->time_to_sample ) { PerlIO_printf(PerlIO_stderr(), "Unable to parse stts: too large\n"); return 0; } for (i = 0; i < mp4->num_time_to_samples; i++) { mp4->time_to_sample[i].sample_count = buffer_get_int(mp4->buf); mp4->time_to_sample[i].sample_duration = buffer_get_int(mp4->buf); DEBUG_TRACE( " sample_count %d sample_duration %d\n", mp4->time_to_sample[i].sample_count, mp4->time_to_sample[i].sample_duration ); } return 1; } uint8_t _mp4_parse_stsc(mp4info *mp4) { int i; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); mp4->num_sample_to_chunks = buffer_get_int(mp4->buf); DEBUG_TRACE(" num_sample_to_chunks %d\n", mp4->num_sample_to_chunks); New(0, mp4->sample_to_chunk, mp4->num_sample_to_chunks * sizeof(*mp4->sample_to_chunk), struct stc ); if ( !mp4->sample_to_chunk ) { PerlIO_printf(PerlIO_stderr(), "Unable to parse stsc: too large\n"); return 0; } for (i = 0; i < mp4->num_sample_to_chunks; i++) { mp4->sample_to_chunk[i].first_chunk = buffer_get_int(mp4->buf); mp4->sample_to_chunk[i].samples_per_chunk = buffer_get_int(mp4->buf); // Skip sample desc index buffer_consume(mp4->buf, 4); DEBUG_TRACE(" first_chunk %d samples_per_chunk %d\n", mp4->sample_to_chunk[i].first_chunk, mp4->sample_to_chunk[i].samples_per_chunk ); } return 1; } uint8_t _mp4_parse_stsz(mp4info *mp4) { int i; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); // Check sample size is 0 if ( buffer_get_int(mp4->buf) != 0 ) { DEBUG_TRACE(" stsz uses fixed sample size\n"); buffer_consume(mp4->buf, 4); return 1; } mp4->num_sample_byte_sizes = buffer_get_int(mp4->buf); DEBUG_TRACE(" num_sample_byte_sizes %d\n", mp4->num_sample_byte_sizes); New(0, mp4->sample_byte_size, mp4->num_sample_byte_sizes * sizeof(*mp4->sample_byte_size), uint16_t ); if ( !mp4->sample_byte_size ) { PerlIO_printf(PerlIO_stderr(), "Unable to parse stsz: too large\n"); return 0; } for (i = 0; i < mp4->num_sample_byte_sizes; i++) { uint32_t v = buffer_get_int(mp4->buf); if (v > 0x0000ffff) { DEBUG_TRACE("stsz[%d] > 65 kB (%ld)\n", i, (long)v); return 0; } mp4->sample_byte_size[i] = v; //DEBUG_TRACE(" sample_byte_size %d\n", v); } return 1; } uint8_t _mp4_parse_stco(mp4info *mp4) { int i; if ( !_check_buf(mp4->infile, mp4->buf, mp4->rsize, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); mp4->num_chunk_offsets = buffer_get_int(mp4->buf); DEBUG_TRACE(" num_chunk_offsets %d\n", mp4->num_chunk_offsets); New(0, mp4->chunk_offset, mp4->num_chunk_offsets * sizeof(*mp4->chunk_offset), uint32_t ); if ( !mp4->chunk_offset ) { PerlIO_printf(PerlIO_stderr(), "Unable to parse stco: too large\n"); return 0; } for (i = 0; i < mp4->num_chunk_offsets; i++) { mp4->chunk_offset[i] = buffer_get_int(mp4->buf); //DEBUG_TRACE(" chunk_offset %d\n", mp4->chunk_offset[i]); } return 1; } uint8_t _mp4_parse_meta(mp4info *mp4) { uint32_t hdlr_size; char type[5]; if ( !_check_buf(mp4->infile, mp4->buf, 12, MP4_BLOCK_SIZE) ) { return 0; } // Skip version/flags buffer_consume(mp4->buf, 4); // Parse/skip meta version of hdlr hdlr_size = buffer_get_int(mp4->buf); strncpy( type, (char *)buffer_ptr(mp4->buf), 4 ); type[4] = '\0'; buffer_consume(mp4->buf, 4); if ( !FOURCC_EQ(type, "hdlr") ) { return 0; } // Skip rest of hdlr if ( !_check_buf(mp4->infile, mp4->buf, hdlr_size - 8, MP4_BLOCK_SIZE) ) { return 0; } buffer_consume(mp4->buf, hdlr_size - 8); return 12 + hdlr_size - 8; } uint8_t _mp4_parse_ilst(mp4info *mp4) { while (mp4->rsize) { uint32_t size; char key[5]; if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) { return 0; } DEBUG_TRACE(" ilst rsize %llu\n", mp4->rsize); // Read Apple annotation box size = buffer_get_int(mp4->buf); strncpy( key, (char *)buffer_ptr(mp4->buf), 4 ); key[4] = '\0'; buffer_consume(mp4->buf, 4); DEBUG_TRACE(" %s size %d\n", key, size); // Note: extra _check_buf calls in this function and other ilst functions // are to avoid reading in the full size of ilst in the case of large artwork upcase(key); if ( FOURCC_EQ(key, "----") ) { // user-specified key/value pair if ( !_mp4_parse_ilst_custom(mp4, size - 8) ) { return 0; } } else { uint32_t bsize; // Ensure we have 8 bytes if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) { return 0; } // Verify data box bsize = buffer_get_int(mp4->buf); DEBUG_TRACE(" box size %d\n", bsize); // Sanity check for bad data size if ( bsize <= size - 8 ) { SV *skey; char *bptr = buffer_ptr(mp4->buf); if ( !FOURCC_EQ(bptr, "data") ) { return 0; } buffer_consume(mp4->buf, 4); skey = newSVpv(key, 0); if ( !_mp4_parse_ilst_data(mp4, bsize - 8, skey) ) { SvREFCNT_dec(skey); return 0; } SvREFCNT_dec(skey); // XXX: bug 14476, files with multiple COVR images aren't handled here, just skipped for now if ( bsize < size - 8 ) { DEBUG_TRACE(" skipping rest of box, %d\n", size - 8 - bsize ); _mp4_skip(mp4, size - 8 - bsize); } } else { DEBUG_TRACE(" invalid data size %d, skipping value\n", bsize); _mp4_skip(mp4, size - 12); } } mp4->rsize -= size; } return 1; } uint8_t _mp4_parse_ilst_data(mp4info *mp4, uint32_t size, SV *key) { uint32_t flags; unsigned char *ckey; SV *value; ckey = (unsigned char *)SvPVX(key); if ( FOURCC_EQ(ckey, "COVR") && _env_true("AUDIO_SCAN_NO_ARTWORK") ) { // Skip artwork if requested and avoid the memory cost value = newSVuv(size - 8); my_hv_store( mp4->tags, "COVR_offset", newSVuv(mp4->audio_offset + (mp4->size - mp4->rsize) + 24) ); _mp4_skip(mp4, size); } else { // Read the full ilst value if ( !_check_buf(mp4->infile, mp4->buf, size, MP4_BLOCK_SIZE) ) { return 0; } // Version(0) + Flags flags = buffer_get_int(mp4->buf); // Skip reserved buffer_consume(mp4->buf, 4); DEBUG_TRACE(" flags %d\n", flags); if ( !flags || flags == 21 ) { if ( FOURCC_EQ( SvPVX(key), "TRKN" ) || FOURCC_EQ( SvPVX(key), "DISK" ) ) { // Special case trkn, disk (pair of 16-bit ints) uint16_t num = 0; uint16_t total = 0; buffer_consume(mp4->buf, 2); // padding num = buffer_get_short(mp4->buf); // Total may not always be present if (size > 12) { total = buffer_get_short(mp4->buf); buffer_consume(mp4->buf, size - 14); // optional padding } DEBUG_TRACE(" %d/%d\n", num, total); if (total) { my_hv_store_ent( mp4->tags, key, newSVpvf( "%d/%d", num, total ) ); } else if (num) { my_hv_store_ent( mp4->tags, key, newSVuv(num) ); } return 1; } else if ( FOURCC_EQ( SvPVX(key), "GNRE" ) ) { // Special case genre, 16-bit int as id3 genre code char const *genre_string; uint16_t genre_num = buffer_get_short(mp4->buf); if (genre_num > 0 && genre_num < NGENRES + 1) { genre_string = _id3_genre_index(genre_num - 1); my_hv_store_ent( mp4->tags, key, newSVpv( genre_string, 0 ) ); } return 1; } else { // Other binary type, try to guess type based on size uint32_t dsize = size - 8; if (dsize == 1) { value = newSVuv( buffer_get_char(mp4->buf) ); } else if (dsize == 2) { value = newSVuv( buffer_get_short(mp4->buf) ); } else if (dsize == 4) { value = newSVuv( buffer_get_int(mp4->buf) ); } else if (dsize == 8) { value = newSVuv( buffer_get_int64(mp4->buf) ); } else { value = newSVpvn( buffer_ptr(mp4->buf), dsize ); buffer_consume(mp4->buf, dsize); } } } else { // text data value = newSVpvn( buffer_ptr(mp4->buf), size - 8 ); sv_utf8_decode(value); // strip copyright symbol 0xA9 out of key if ( ckey[0] == 0xA9 ) { ckey++; } DEBUG_TRACE(" %s = %s\n", ckey, SvPVX(value)); buffer_consume(mp4->buf, size - 8); } } // if key exists, create array if ( my_hv_exists( mp4->tags, (char *)ckey ) ) { SV **entry = my_hv_fetch( mp4->tags, (char *)ckey ); if (entry != NULL) { if ( SvROK(*entry) && SvTYPE(SvRV(*entry)) == SVt_PVAV ) { av_push( (AV *)SvRV(*entry), value ); } else { // A non-array entry, convert to array. AV *ref = newAV(); av_push( ref, newSVsv(*entry) ); av_push( ref, value ); my_hv_store( mp4->tags, (char *)ckey, newRV_noinc( (SV*)ref ) ); } } } else { my_hv_store( mp4->tags, (char *)ckey, value ); } return 1; } uint8_t _mp4_parse_ilst_custom(mp4info *mp4, uint32_t size) { SV *key = NULL; while (size) { char type[5]; uint32_t bsize; // Ensure we have 8 bytes to get the size and type if ( !_check_buf(mp4->infile, mp4->buf, 8, MP4_BLOCK_SIZE) ) { return 0; } // Read box bsize = buffer_get_int(mp4->buf); strncpy( type, (char *)buffer_ptr(mp4->buf), 4 ); type[4] = '\0'; buffer_consume(mp4->buf, 4); DEBUG_TRACE(" %s size %d\n", type, bsize); if ( FOURCC_EQ(type, "name") ) { // Ensure we have bsize bytes if ( !_check_buf(mp4->infile, mp4->buf, bsize, MP4_BLOCK_SIZE) ) { return 0; } buffer_consume(mp4->buf, 4); // padding key = newSVpvn( buffer_ptr(mp4->buf), bsize - 12); sv_utf8_decode(key); upcase(SvPVX(key)); buffer_consume(mp4->buf, bsize - 12); DEBUG_TRACE(" %s\n", SvPVX(key)); } else if ( FOURCC_EQ(type, "data") ) { if (!key) { // No key yet, data is out of order return 0; } if ( !_mp4_parse_ilst_data(mp4, bsize - 8, key) ) { SvREFCNT_dec(key); return 0; } } else { // skip (mean, or other boxes) if ( !_check_buf(mp4->infile, mp4->buf, bsize - 8, MP4_BLOCK_SIZE) ) { return 0; } buffer_consume(mp4->buf, bsize - 8); } size -= bsize; } SvREFCNT_dec(key); return 1; } HV * _mp4_get_current_trackinfo(mp4info *mp4) { // Return the trackinfo hash for track id == mp4->current_track AV *tracks; HV *trackinfo; int i; SV **entry = my_hv_fetch(mp4->info, "tracks"); if (entry != NULL) { tracks = (AV *)SvRV(*entry); } else { return NULL; } // Find entry for this stream number for (i = 0; av_len(tracks) >= 0 && i <= av_len(tracks); i++) { SV **info = av_fetch(tracks, i, 0); if (info != NULL) { SV **tid; trackinfo = (HV *)SvRV(*info); tid = my_hv_fetch( trackinfo, "id" ); if (tid != NULL) { if ( SvIV(*tid) == mp4->current_track ) { return trackinfo; } } } } return NULL; } uint32_t _mp4_descr_length(Buffer *buf) { uint8_t b; uint8_t num_bytes = 0; uint32_t length = 0; do { b = buffer_get_char(buf); num_bytes++; length = (length << 7) | (b & 0x7f); } while ( (b & 0x80) && num_bytes < 4 ); return length; } void _mp4_skip(mp4info *mp4, uint32_t size) { if ( buffer_len(mp4->buf) >= size ) { //buffer_dump(mp4->buf, size); buffer_consume(mp4->buf, size); DEBUG_TRACE(" skipped buffer data size %d\n", size); } else { PerlIO_seek(mp4->infile, size - buffer_len(mp4->buf), SEEK_CUR); buffer_clear(mp4->buf); DEBUG_TRACE(" seeked past %d bytes to %d\n", size, (int)PerlIO_tell(mp4->infile)); } } uint32_t _mp4_samples_in_chunk(mp4info *mp4, uint32_t chunk) { int i; for (i = mp4->num_sample_to_chunks - 1; i >= 0; i--) { if (mp4->sample_to_chunk[i].first_chunk <= chunk) { return mp4->sample_to_chunk[i].samples_per_chunk; } } return mp4->sample_to_chunk[0].samples_per_chunk; } uint32_t _mp4_total_samples(mp4info *mp4) { int i; uint32_t total = 0; for (i = 0; i < mp4->num_time_to_samples; i++) { total += mp4->time_to_sample[i].sample_count; } return total; } uint32_t _mp4_get_sample_duration(mp4info *mp4, uint32_t sample) { int i; uint32_t co = 0; for (i = 0; i < mp4->num_time_to_samples; i++) { uint32_t delta = mp4->time_to_sample[i].sample_count; if (sample < co + delta) { return mp4->time_to_sample[i].sample_duration; } co += delta; } return 0; }