Skip to content

Commit 74845e6

Browse files
committed
MP4Remuxer: Fill silent frames to workaround a/v unsync issue
1 parent b6819c3 commit 74845e6

File tree

1 file changed

+79
-31
lines changed

1 file changed

+79
-31
lines changed

src/remux/mp4-remuxer.js

Lines changed: 79 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ class MP4Remuxer {
196196
let samples = track.samples;
197197
let dtsCorrection = undefined;
198198
let firstDts = -1, lastDts = -1, lastPts = -1;
199+
let refSampleDuration = this._audioMeta.refSampleDuration;
199200

200201
let mpegRawTrack = this._audioMeta.codec === 'mp3' && this._mp3UseMpegAudio;
201202
let firstSegmentAfterSeek = this._dtsBaseInited && this._audioNextDts === undefined;
@@ -254,10 +255,14 @@ class MP4Remuxer {
254255
let firstSampleDts = firstSampleOriginalDts - dtsCorrection;
255256
let videoSegment = this._videoSegmentInfoList.getLastSegmentBefore(firstSampleOriginalDts);
256257
if (videoSegment != null && videoSegment.beginDts < firstSampleDts) {
257-
let silentFrameDuration = firstSampleDts - videoSegment.beginDts;
258-
let frame = this._generateSilentAudio(videoSegment.beginDts, silentFrameDuration);
259-
samples.unshift({unit: frame.unit, dts: frame.dts, pts: frame.pts});
260-
mdatBytes += frame.unit.byteLength;
258+
let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
259+
if (silentUnit) {
260+
let dts = videoSegment.beginDts;
261+
let silentFrameDuration = firstSampleDts - videoSegment.beginDts;
262+
Log.v(this.TAG, `InsertPrefixSilentAudio: dts: ${dts}, duration: ${silentFrameDuration}`);
263+
samples.unshift({unit: silentUnit, dts: dts, pts: dts});
264+
mdatBytes += silentUnit.byteLength;
265+
} // silentUnit == null: Cannot generate, skip
261266
} else {
262267
insertPrefixSilentFrame = false;
263268
}
@@ -285,10 +290,74 @@ class MP4Remuxer {
285290
if (mp4Samples.length >= 1) { // use second last sample duration
286291
sampleDuration = mp4Samples[mp4Samples.length - 1].duration;
287292
} else { // the only one sample, use reference sample duration
288-
sampleDuration = Math.floor(this._audioMeta.refSampleDuration);
293+
sampleDuration = Math.floor(refSampleDuration);
289294
}
290295
}
291296

297+
let needFillSilentFrames = false;
298+
let silentFrames = null;
299+
300+
// Silent frame generation, if large timestamp gap detected
301+
if (sampleDuration > refSampleDuration * 1.5 && this._audioMeta.codec !== 'mp3') {
302+
// We need to insert silent frames to fill timestamp gap
303+
needFillSilentFrames = true;
304+
let delta = Math.abs(sampleDuration - refSampleDuration);
305+
let frameCount = Math.ceil(delta / refSampleDuration);
306+
let currentDts = dts + refSampleDuration; // Notice: in float
307+
308+
Log.w(this.TAG, 'Large audio timestamp gap detected, may cause AV sync to drift. ' +
309+
'Silent frames will be generated to avoid unsync.\n' +
310+
`dts: ${dts + sampleDuration} ms, expected: ${dts + Math.round(refSampleDuration)} ms, ` +
311+
`delta: ${Math.round(delta)} ms, generate: ${frameCount} frames`);
312+
313+
let silentUnit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
314+
if (silentUnit == null) {
315+
Log.w(this.TAG, 'Unable to generate silent frame for ' +
316+
`${this._audioMeta.originalCodec} with ${this._audioMeta.channelCount} channels, repeat last frame`);
317+
// Repeat last frame
318+
silentUnit = unit;
319+
}
320+
silentFrames = [];
321+
322+
for (let j = 0; j < frameCount; j++) {
323+
let intDts = Math.round(currentDts); // round to integer
324+
if (silentFrames.length > 0) {
325+
// Set previous frame sample duration
326+
let previousFrame = silentFrames[silentFrames.length - 1];
327+
previousFrame.duration = intDts - previousFrame.dts;
328+
}
329+
let frame = {
330+
dts: intDts,
331+
pts: intDts,
332+
cts: 0,
333+
unit: silentUnit,
334+
size: silentUnit.byteLength,
335+
duration: 0, // wait for next sample
336+
originalDts: originalDts,
337+
flags: {
338+
isLeading: 0,
339+
dependsOn: 1,
340+
isDependedOn: 0,
341+
hasRedundancy: 0
342+
}
343+
};
344+
silentFrames.push(frame);
345+
mdatBytes += unit.byteLength;
346+
currentDts += refSampleDuration;
347+
}
348+
349+
// last frame: align end time to next frame dts
350+
let lastFrame = silentFrames[silentFrames.length - 1];
351+
lastFrame.duration = dts + sampleDuration - lastFrame.dts;
352+
353+
// silentFrames.forEach((frame) => {
354+
// Log.w(this.TAG, `SilentAudio: dts: ${frame.dts}, duration: ${frame.duration}`);
355+
// });
356+
357+
// Set correct sample duration for current frame
358+
sampleDuration = Math.round(refSampleDuration);
359+
}
360+
292361
mp4Samples.push({
293362
dts: dts,
294363
pts: dts,
@@ -304,6 +373,11 @@ class MP4Remuxer {
304373
hasRedundancy: 0
305374
}
306375
});
376+
377+
if (needFillSilentFrames) {
378+
// Silent frames should be inserted after wrong-duration frame
379+
mp4Samples.push.apply(mp4Samples, silentFrames);
380+
}
307381
}
308382

309383
// allocate mdatbox
@@ -387,32 +461,6 @@ class MP4Remuxer {
387461
this._onMediaSegment('audio', segment);
388462
}
389463

390-
_generateSilentAudio(dts, frameDuration) {
391-
Log.v(this.TAG, `GenerateSilentAudio: dts = ${dts}, duration = ${frameDuration}`);
392-
393-
let unit = AAC.getSilentFrame(this._audioMeta.originalCodec, this._audioMeta.channelCount);
394-
if (unit == null) {
395-
Log.w(this.TAG, `Cannot generate silent aac frame for channelCount = ${this._audioMeta.channelCount}`);
396-
return null;
397-
}
398-
399-
return {
400-
unit,
401-
dts: dts,
402-
pts: dts,
403-
cts: 0,
404-
size: unit.byteLength,
405-
duration: frameDuration,
406-
originalDts: dts,
407-
flags: {
408-
isLeading: 0,
409-
dependsOn: 1,
410-
isDependedOn: 0,
411-
hasRedundancy: 0
412-
}
413-
};
414-
}
415-
416464
_remuxVideo(videoTrack) {
417465
if (this._videoMeta == null) {
418466
return;

0 commit comments

Comments
 (0)