From 42328f2004253466a0c436db89ef46f883ae209c Mon Sep 17 00:00:00 2001
From: Mike Melanson <mike@multimedia.cx>
Date: Mon, 23 Sep 2002 03:34:03 +0000
Subject: preliminary support for edit lists in video traks; cleaned up audio
 handling

CVS patchset: 2743
CVS date: 2002/09/23 03:34:03
---
 src/demuxers/demux_qt.c | 302 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 223 insertions(+), 79 deletions(-)

(limited to 'src')

diff --git a/src/demuxers/demux_qt.c b/src/demuxers/demux_qt.c
index 96f6ad5bb..f6d035469 100644
--- a/src/demuxers/demux_qt.c
+++ b/src/demuxers/demux_qt.c
@@ -30,7 +30,7 @@
  *    build_frame_table
  *  free_qt_info
  *
- * $Id: demux_qt.c,v 1.86 2002/09/22 17:07:52 tmmm Exp $
+ * $Id: demux_qt.c,v 1.87 2002/09/23 03:34:03 tmmm Exp $
  *
  */
 
@@ -96,6 +96,8 @@ typedef unsigned int qt_atom;
 
 #define ESDS_ATOM QT_ATOM('e', 's', 'd', 's')
 
+#define IMA4_FOURCC QT_ATOM('i', 'm', 'a', '4')
+
 /* placeholder for cutting and pasting */
 #define _ATOM QT_ATOM('', '', '', '')
 
@@ -212,6 +214,13 @@ typedef struct {
   void *decoder_config;
   int decoder_config_len;
 
+  /* special audio parameters */
+  unsigned int samples_per_packet;
+  unsigned int bytes_per_packet;
+  unsigned int bytes_per_frame;
+  unsigned int bytes_per_sample;
+  unsigned int samples_per_frame;
+
 } qt_sample_table;
 
 typedef struct {
@@ -220,7 +229,7 @@ typedef struct {
 
   unsigned int creation_time;  /* in ms since Jan-01-1904 */
   unsigned int modification_time;
-  unsigned int time_scale;  /* base clock frequency is Hz */
+  unsigned int timescale;  /* base clock frequency is Hz */
   unsigned int duration;
   off_t input_length;
         
@@ -360,7 +369,7 @@ qt_info *create_qt_info(void) {
 
   info->creation_time = 0;
   info->modification_time = 0;
-  info->time_scale = 0;
+  info->timescale = 0;
   info->duration = 0;
   info->input_length = 0;
 
@@ -424,7 +433,7 @@ static void parse_mvhd_atom(qt_info *info, unsigned char *mvhd_atom) {
 
   info->creation_time = BE_32(&mvhd_atom[0x0C]);
   info->modification_time = BE_32(&mvhd_atom[0x10]);
-  info->time_scale = BE_32(&mvhd_atom[0x14]);
+  info->timescale = BE_32(&mvhd_atom[0x14]);
   info->duration = BE_32(&mvhd_atom[0x18]);
 
 }
@@ -632,6 +641,63 @@ static qt_error parse_trak_atom(qt_sample_table *sample_table,
           BE_16(&trak_atom[i + 0x2C]);
         sample_table->media_description.audio.channels = trak_atom[i + 0x25];
         sample_table->media_description.audio.bits = trak_atom[i + 0x27];
+
+        /* assume uncompressed audio parameters */
+        sample_table->bytes_per_sample =
+          sample_table->media_description.audio.bits / 8;
+        sample_table->samples_per_frame =
+          sample_table->media_description.audio.channels;
+        sample_table->bytes_per_frame = 
+          sample_table->bytes_per_sample * sample_table->samples_per_frame;
+        sample_table->samples_per_packet = sample_table->samples_per_frame;
+        sample_table->bytes_per_packet = sample_table->bytes_per_sample;
+
+        /* special case time: some ima4-encoded files don't have the
+         * extra header; compensate */
+        if (BE_32(&trak_atom[i + 0x10]) == IMA4_FOURCC) {
+          sample_table->samples_per_packet = 64;
+          sample_table->bytes_per_packet = 34;
+          sample_table->bytes_per_frame = 34 * 
+            sample_table->media_description.audio.channels;
+          sample_table->bytes_per_sample = 2;
+          sample_table->samples_per_frame = 64 *
+            sample_table->media_description.audio.channels;
+        }
+
+        /* it's time to dig a little deeper to determine the real audio
+         * properties; if a the stsd compressor atom has 0x24 bytes, it
+         * appears to be a handler for uncompressed data; if there are an
+         * extra 0x10 bytes, there are some more useful decoding params */
+        if (BE_32(&trak_atom[i + 0x0C]) > 0x24) {
+
+          if (BE_32(&trak_atom[i + 0x30]))
+            sample_table->samples_per_packet = BE_32(&trak_atom[i + 0x30]);
+          if (BE_32(&trak_atom[i + 0x34]))
+            sample_table->bytes_per_packet = BE_32(&trak_atom[i + 0x34]);
+          if (BE_32(&trak_atom[i + 0x38]))
+            sample_table->bytes_per_frame = BE_32(&trak_atom[i + 0x38]);
+          if (BE_32(&trak_atom[i + 0x3C]))
+            sample_table->bytes_per_sample = BE_32(&trak_atom[i + 0x3C]);
+          sample_table->samples_per_frame =
+            (sample_table->bytes_per_frame / sample_table->bytes_per_packet) *
+            sample_table->samples_per_packet;
+
+        }
+
+/*
+printf("*** audio: %d bits, %d channels, %d Hz\n" \
+    "  %d samples/packet, %d bytes/packet, %d bytes/frame, %d bytes/sample, %d samples/frame\n",
+  sample_table->media_description.audio.bits,
+  sample_table->media_description.audio.channels,
+  sample_table->media_description.audio.sample_rate,
+  sample_table->samples_per_packet,
+  sample_table->bytes_per_packet,
+  sample_table->bytes_per_frame,
+  sample_table->bytes_per_sample,
+  sample_table->samples_per_frame
+);
+*/
+
       }
 
     } else if (current_atom == ESDS_ATOM) {
@@ -815,7 +881,9 @@ static qt_error parse_trak_atom(qt_sample_table *sample_table,
 free_sample_table:
   free(sample_table->edit_list_table);
   free(sample_table->chunk_offset_table);
-  free(sample_table->sample_size_table);
+  /* this pointer might have been set to -1 as a special case */
+  if (sample_table->sample_size_table != (void *)-1)
+    free(sample_table->sample_size_table);
   free(sample_table->sync_sample_table);
   free(sample_table->sample_to_chunk_table);
   free(sample_table->time_to_sample_table);
@@ -824,19 +892,23 @@ free_sample_table:
   return last_error;
 }
 
-static qt_error build_frame_table(qt_sample_table *sample_table) {
+static qt_error build_frame_table(qt_sample_table *sample_table,
+  unsigned int global_timescale) {
 
   int i, j;
   unsigned int frame_counter;
-  unsigned int next_keyframe;
-  unsigned int keyframe_index;
   unsigned int chunk_start, chunk_end;
   unsigned int samples_per_chunk;
   uint64_t current_offset;
   int64_t current_pts;
   unsigned int pts_index;
   unsigned int pts_index_countdown;
-  unsigned int official_audio_byte_counter = 0;
+  unsigned int audio_frame_counter = 0;
+  unsigned int edit_list_media_time;
+  int64_t edit_list_duration;
+  int64_t frame_duration = 0;
+  unsigned int edit_list_index;
+  unsigned int edit_list_pts_counter;
 
   /* AUDIO and OTHER frame types follow the same rules; VIDEO follows a
    * different set */
@@ -850,14 +922,6 @@ static qt_error build_frame_table(qt_sample_table *sample_table) {
     if (!sample_table->frames)
       return QT_NO_MEMORY;
 
-    /* initialize keyframe management */
-    keyframe_index = 0;
-    if (sample_table->sync_sample_table) {
-      next_keyframe = sample_table->sync_sample_table[keyframe_index++] - 1;
-    } else {
-      next_keyframe = 0xFFFFFFFF;  /* this means all frames are key */
-    }
-
     /* initialize more accounting variables */
     frame_counter = 0;
     current_pts = 0;
@@ -900,26 +964,15 @@ static qt_error build_frame_table(qt_sample_table *sample_table) {
               sample_table->sample_size_table[frame_counter];
           }
 
-          /* figure out the keyframe situation for this frame */
-          /* if the next_keyframe is all F's, every frame is a keyframe */
-          if (next_keyframe == 0xFFFFFFFF)
-            sample_table->frames[frame_counter].keyframe = 1;
-          else if (next_keyframe == frame_counter) {
-            sample_table->frames[frame_counter].keyframe = 1;
-            if (keyframe_index < sample_table->sync_sample_count)
-              next_keyframe =
-                sample_table->sync_sample_table[keyframe_index++] - 1;
-            else
-              /* this frame number will hopefully never be reached */
-              next_keyframe = 0xFFFFFFFE;
-          }
-          else
+          /* if there is no stss (sample sync) table, make all of the frames
+           * keyframes; otherwise, clear the keyframe bits for now */
+          if (sample_table->sync_sample_table)
             sample_table->frames[frame_counter].keyframe = 0;
+          else
+            sample_table->frames[frame_counter].keyframe = 1;
 
           /* figure out the pts situation */
           sample_table->frames[frame_counter].pts = current_pts;
-          sample_table->frames[frame_counter].pts *= 90000;
-          sample_table->frames[frame_counter].pts /= sample_table->timescale;
           current_pts +=
             sample_table->time_to_sample_table[pts_index].duration;
           pts_index_countdown--;
@@ -936,6 +989,93 @@ static qt_error build_frame_table(qt_sample_table *sample_table) {
       }
     }
 
+    /* fill in the keyframe information */
+    for (i = 0; i < sample_table->sync_sample_count; i++)
+      sample_table->frames[sample_table->sync_sample_table[i] - 1].keyframe = 1;
+
+    /* initialize edit list considerations */
+    edit_list_index = 0;
+    if (sample_table->edit_list_table) {
+      edit_list_media_time = 
+        sample_table->edit_list_table[edit_list_index].media_time;
+      edit_list_duration = 
+        sample_table->edit_list_table[edit_list_index].track_duration;
+
+      /* duration is in global timescale units; convert to trak timescale */
+      edit_list_duration *= sample_table->timescale;
+      edit_list_duration /= global_timescale;
+
+      edit_list_index++;
+      /* if this is the last edit list entry, don't let the duration
+       * expire (so set it to an absurdly large value) */
+      if (edit_list_index == sample_table->edit_list_count)
+        edit_list_duration = 0xFFFFFFFF;
+//printf ("edit list table exists, initial = %d, %lld\n", edit_list_media_time, edit_list_duration);
+    } else {
+      edit_list_media_time = 0;
+      edit_list_duration = 0xFFFFFFFF;
+//printf ("no edit list table, initial = %d, %lld\n", edit_list_media_time, edit_list_duration);
+    }
+
+    /* fix up pts information w.r.t. the edit list table */
+    edit_list_pts_counter = 0;
+    for (i = 0; i < sample_table->frame_count; i++) {
+
+//printf ("%d: (before) pts = %lld...", i, sample_table->frames[i].pts);
+
+      if (sample_table->frames[i].pts < edit_list_media_time)
+        sample_table->frames[i].pts = edit_list_pts_counter;
+      else {
+        /* this is not strictly correct but seems to work well enough */
+        if (i < sample_table->frame_count)
+          frame_duration = 
+            (sample_table->frames[i + 1].pts - sample_table->frames[i].pts);
+
+//printf ("frame duration = %lld...", frame_duration);
+        sample_table->frames[i].pts = edit_list_pts_counter;
+        edit_list_pts_counter += frame_duration;
+        edit_list_duration -= frame_duration;
+      }
+
+//printf ("(fixup) pts = %lld...", sample_table->frames[i].pts);
+
+      /* reload media time and duration */
+      if (edit_list_duration <= 0) {
+        if ((sample_table->edit_list_table) &&
+            (edit_list_index < sample_table->edit_list_count)) {
+//printf ("edit list index = %d\n", edit_list_index);
+          edit_list_media_time = 
+            sample_table->edit_list_table[edit_list_index].media_time;
+          edit_list_duration = 
+            sample_table->edit_list_table[edit_list_index].track_duration;
+
+          /* duration is in global timescale units; convert to trak timescale */
+          edit_list_duration *= sample_table->timescale;
+          edit_list_duration /= global_timescale;
+
+          edit_list_index++;
+          /* if this is the last edit list entry, don't let the duration
+           * expire (so set it to an absurdly large value) */
+          if (edit_list_index == sample_table->edit_list_count)
+            edit_list_duration = 0xFFFFFFFF;
+//printf ("edit list table exists: %d, %lld\n", edit_list_media_time, edit_list_duration);
+        } else {
+          edit_list_media_time = 0;
+          edit_list_duration = 0xFFFFFFFF;
+//printf ("no edit list table (or expired): %d, %lld\n", edit_list_media_time, edit_list_duration);
+        }
+      }
+
+//printf ("(after) pts = %lld...\n", sample_table->frames[i].pts);
+    }
+
+    /* compute final pts values */
+    for (i = 0; i < sample_table->frame_count; i++) {
+      sample_table->frames[i].pts *= 90000;
+      sample_table->frames[i].pts /= sample_table->timescale;
+//printf (" final pts for sample %d = %lld\n", i, sample_table->frames[i].pts);
+    }
+
   } else {
 
     /* in this case, the total number of frames is equal to the number of
@@ -960,25 +1100,46 @@ static qt_error build_frame_table(qt_sample_table *sample_table) {
              final chunk number (the number of offsets in stco table) */
           chunk_end = sample_table->chunk_offset_count + 1;
 
-        /* iterate through each sample in a chunk */
+        /* iterate through each sample in a chunk and fill in size and
+         * pts information */
         for (j = chunk_start - 1; j < chunk_end - 1; j++) {
-          sample_table->frames[j].official_byte_count =
-            official_audio_byte_counter;
-          official_audio_byte_counter +=
+
+          /* figure out the pts for this chunk */
+          sample_table->frames[j].pts = audio_frame_counter;
+          sample_table->frames[j].pts *= 90000;
+          sample_table->frames[j].pts /= sample_table->timescale;
+
+          /* fetch the alleged chunk size according to the QT header */
+          sample_table->frames[j].size =
             sample_table->sample_to_chunk_table[i].samples_per_chunk;
+
+          /* the chunk size is actually the audio frame count */
+          audio_frame_counter += sample_table->frames[j].size;
+
+          /* compute the actual chunk size */
+          sample_table->frames[j].size =
+            (sample_table->frames[j].size * 
+             sample_table->media_description.audio.channels) /
+             sample_table->samples_per_frame *
+             sample_table->bytes_per_frame;
+
+/*
+printf ("bits = %d, channels = %d, audio_frame_counter = %d, pts = %lld\n",
+  sample_table->media_description.audio.bits,
+  sample_table->media_description.audio.channels,
+  audio_frame_counter, sample_table->frames[j].pts);
+*/
+
         }
       }
     }
 
+    /* fill in the rest of the information for the audio samples */
     for (i = 0; i < sample_table->frame_count; i++) {
       sample_table->frames[i].type = sample_table->type;
       sample_table->frames[i].offset = sample_table->chunk_offset_table[i];
-      sample_table->frames[i].size = 0;  /* temporary, of course */
       sample_table->frames[i].keyframe = 0;
-      if (sample_table->type == MEDIA_AUDIO)
-        sample_table->frames[i].pts =
-        sample_table->sample_size_count;   /* stash away for audio pts calc */
-      else
+      if (sample_table->type != MEDIA_AUDIO)
         sample_table->frames[i].pts = 0;
     }
   }
@@ -1002,9 +1163,6 @@ static void parse_moov_atom(qt_info *info, unsigned char *moov_atom) {
   unsigned int *sample_table_indices;
   unsigned int min_offset_table;
   int64_t min_offset;
-  unsigned int audio_byte_counter;
-  unsigned int total_audio_bytes;
-  int64_t audio_pts_multiplier;
 
   /* make sure this is actually a moov atom */
   if (BE_32(&moov_atom[4]) != MOOV_ATOM) {
@@ -1040,7 +1198,7 @@ static void parse_moov_atom(qt_info *info, unsigned char *moov_atom) {
   info->frame_count = 0;
   for (i = 0; i < sample_table_count; i++) {
 
-    build_frame_table(&sample_tables[i]);
+    build_frame_table(&sample_tables[i], info->timescale);
     info->frame_count += sample_tables[i].frame_count;
 
     /* while traversing tables, look for A/V information */
@@ -1132,35 +1290,6 @@ static void parse_moov_atom(qt_info *info, unsigned char *moov_atom) {
         sample_table_indices[min_offset_table] = info->frame_count;
   }
 
-  /* fill in the missing and incomplete information (pts and frame sizes) */
-  audio_byte_counter = 0;
-  audio_pts_multiplier = 90000;
-  audio_pts_multiplier *= info->duration;
-  audio_pts_multiplier /= info->time_scale;
-  for (i = 0; i < info->frame_count; i++) {
-
-    if (info->frames[i].type == MEDIA_AUDIO) {
-
-      /* finish the pts calculation for this audio frame */
-      /* .pts currently holds the total nubmer of bytes for the stream */
-      total_audio_bytes = info->frames[i].pts;
-      info->frames[i].pts = audio_pts_multiplier;
-      info->frames[i].pts *= info->frames[i].official_byte_count;
-      info->frames[i].pts /= total_audio_bytes;
-
-      /* figure out the audio frame size */
-      if (i < info->frame_count - 1)
-        info->frames[i].size =
-          info->frames[i + 1].offset - info->frames[i].offset;
-      else
-        info->frames[i].size =
-          info->moov_last_offset - info->frames[i].offset;
-
-      audio_byte_counter += info->frames[i].size;
-
-    }
-  }
-
   /* free the temporary tables on the way out */
   for (i = 0; i < sample_table_count; i++) {
     free(sample_tables[i].edit_list_table);
@@ -1321,6 +1450,8 @@ static void *demux_qt_loop (void *this_gen) {
   int64_t last_frame_pts = 0;
   unsigned int i;
   unsigned int remaining_sample_bytes;
+  int edit_list_compensation = 0;
+  int frame_duration;
 
   pthread_mutex_lock( &this->mutex );
 
@@ -1370,6 +1501,19 @@ static void *demux_qt_loop (void *this_gen) {
         this->input->seek(this->input, this->qt->frames[i].offset,
           SEEK_SET);
 
+        /* Due to the edit lists, some successive frames have the same pts
+         * which would ordinarily cause frame_duration to be 0 which can
+         * cause DIV-by-0 errors in the engine. Perform this little trick
+         * to compensate. */
+        frame_duration = this->qt->frames[i].pts - last_frame_pts;
+        if (!frame_duration) {
+          frame_duration = 1;
+          edit_list_compensation++;
+        } else {
+          frame_duration -= edit_list_compensation;
+          edit_list_compensation = 0;
+        }
+
         while (remaining_sample_bytes) {
           buf = this->video_fifo->buffer_pool_alloc (this->video_fifo);
           buf->type = this->qt->video_type;
@@ -1380,7 +1524,7 @@ static void *demux_qt_loop (void *this_gen) {
 
           if (last_frame_pts) {
             buf->decoder_flags |= BUF_FLAG_FRAMERATE;
-            buf->decoder_info[0] = buf->pts - last_frame_pts;
+            buf->decoder_info[0] = frame_duration;
           }
 
           if (remaining_sample_bytes > buf->max_size)
@@ -1606,8 +1750,8 @@ static int demux_qt_start (demux_plugin_t *this_gen,
     xine_log (this->xine, XINE_LOG_MSG,
       _("demux_qt: Apple Quicktime file, %srunning time: %d min, %d sec\n"),
       (this->qt->compressed_header) ? "compressed header, " : "",
-      this->qt->duration / this->qt->time_scale / 60,
-      this->qt->duration / this->qt->time_scale % 60);
+      this->qt->duration / this->qt->timescale / 60,
+      this->qt->duration / this->qt->timescale % 60);
     if (this->qt->video_codec)
       xine_log (this->xine, XINE_LOG_MSG,
         _("demux_qt: '%c%c%c%c' video @ %dx%d\n"),
@@ -1833,7 +1977,7 @@ static int demux_qt_get_stream_length (demux_plugin_t *this_gen) {
 
   demux_qt_t *this = (demux_qt_t *) this_gen;
 
-  return this->qt->duration / this->qt->time_scale;
+  return this->qt->duration / this->qt->timescale;
 }
 
 static void *init_demuxer_plugin (xine_t *xine, void *data) {
-- 
cgit v1.2.3