98b95881 by David LaPalomento

Move to a handrolled parser

Split parsing into tokenization and a very liberal parser. After this, an "interpreter" needs to be created to build an object representation of the manifest based on the events emitted by the parser. Higher-level manifest tests are broken until that interpreter is written.
1 parent 9fa0bf90
'use strict';
var peg = require('pegjs');
module.exports = function(grunt) {
// Project configuration.
......@@ -97,16 +95,9 @@ module.exports = function(grunt) {
grunt.loadNpmTasks('grunt-contrib-jshint');
grunt.loadNpmTasks('grunt-contrib-watch');
grunt.registerTask('peg', 'generate the manifest parser', function() {
var parser = peg.buildParser(grunt.file.read('src/m3u8/m3u8.pegjs'));
grunt.file.write('build/m3u8-parser.js',
'window.videojs.hls.M3U8Parser = ' + parser.toSource());
});
// Default task.
grunt.registerTask('default',
['peg',
'jshint',
['jshint',
'qunit',
'clean',
'concat',
......
......@@ -6,21 +6,16 @@
},
"license": "Apache 2",
"scripts": {
"test": "grunt qunit",
"prepublish": "npm run peg",
"peg": "pegjs -e 'var M3U8Parser = module.exports' src/m3u8/m3u8.pegjs src/m3u8/m3u8-generated.js",
"testpeg": "npm run peg && node test/pegtest.js"
"test": "grunt qunit"
},
"devDependencies": {
"grunt-contrib-jshint": "~0.6.0",
"grunt-contrib-qunit": "~0.2.0",
"grunt-contrib-concat": "~0.3.0",
"grunt-contrib-nodeunit": "~0.1.2",
"grunt-contrib-uglify": "~0.2.0",
"grunt-contrib-watch": "~0.4.0",
"grunt-contrib-clean": "~0.4.0",
"grunt": "~0.4.1",
"pegjs": "git+https://github.com/dmajda/pegjs.git"
"grunt": "~0.4.1"
},
"dependencies": {
"video.js": "~4.2.2",
......
(function(window) {
var M3U8 = window.videojs.hls.M3U8;
window.videojs.hls.M3U8Parser = function() {
var
self = this,
tagTypes = window.videojs.hls.m3u8TagType,
lines = [],
data;
self.getTagType = function(lineData) {
for (var s in tagTypes) {
if (lineData.indexOf(tagTypes[s]) === 0) {
return tagTypes[s];
}
}
};
self.getTagValue = function(lineData) {
for (var s in tagTypes) {
if (lineData.indexOf(tagTypes[s]) === 0) {
return lineData.substr(tagTypes[s].length);
}
}
};
self.parse = function(rawDataString) {
data = new M3U8();
if (self.directory) {
data.directory = self.directory;
}
if (rawDataString === undefined || rawDataString.length <= 0) {
data.invalidReasons.push("Empty Manifest");
return;
}
lines = rawDataString.split('\n');
lines.forEach(function(value,index) {
var segment, rendition, attributes;
switch (self.getTagType(value)) {
case tagTypes.EXTM3U:
data.hasValidM3UTag = (index === 0);
if (!data.hasValidM3UTag) {
data.invalidReasons.push("Invalid EXTM3U Tag");
}
break;
case tagTypes.DISCONTINUITY:
break;
case tagTypes.PLAYLIST_TYPE:
if (self.getTagValue(value) === "VOD" ||
self.getTagValue(value) === "EVENT") {
data.playlistType = self.getTagValue(value);
} else {
data.invalidReasons.push("Invalid Playlist Type Value");
}
break;
case tagTypes.EXTINF:
segment = {
url: "unknown",
byterange: -1,
targetDuration: data.targetDuration
};
if (self.getTagType(lines[index + 1]) === tagTypes.BYTERANGE) {
segment.byterange = self.getTagValue(lines[index + 1]).split('@');
segment.url = lines[index + 2];
} else {
segment.url = lines[index + 1];
}
if (segment.url.indexOf("http") === -1 && self.directory) {
if (data.directory[data.directory.length-1] === segment.url[0] &&
segment.url[0] === "/") {
segment.url = segment.url.substr(1);
}
segment.url = self.directory + segment.url;
}
data.mediaItems.push(segment);
break;
case tagTypes.STREAM_INF:
rendition = {};
attributes = value.substr(tagTypes.STREAM_INF.length).split(',');
attributes.forEach(function(attrValue) {
if (isNaN(attrValue.split('=')[1])) {
rendition[attrValue.split('=')[0].toLowerCase()] = attrValue.split('=')[1];
if (rendition[attrValue.split('=')[0].toLowerCase()].split('x').length === 2) {
rendition.resolution = {
width: parseInt(rendition[attrValue.split('=')[0].toLowerCase()].split('x')[0],10),
height: parseInt(rendition[attrValue.split('=')[0].toLowerCase()].split('x')[1],10)
};
}
} else {
rendition[attrValue.split('=')[0].toLowerCase()] = parseInt(attrValue.split('=')[1],10);
}
});
if (self.getTagType(lines[index + 1]) === tagTypes.BYTERANGE) {
rendition.byterange = self.getTagValue(lines[index + 1]).split('@');
rendition.url = lines[index + 2];
} else {
rendition.url = lines[index + 1];
}
data.isPlaylist = true;
data.playlistItems.push(rendition);
break;
case tagTypes.TARGETDURATION:
data.targetDuration = parseFloat(self.getTagValue(value).split(',')[0]);
break;
case tagTypes.ZEN_TOTAL_DURATION:
data.totalDuration = parseFloat(self.getTagValue(value));
break;
case tagTypes.VERSION:
data.version = parseFloat(self.getTagValue(value));
break;
case tagTypes.MEDIA_SEQUENCE:
data.mediaSequence = parseInt(self.getTagValue(value),10);
break;
case tagTypes.ALLOW_CACHE:
if (self.getTagValue(value) === "YES" || self.getTagValue(value) === "NO") {
data.allowCache = self.getTagValue(value);
} else {
data.invalidReasons.push("Invalid ALLOW_CACHE Value");
}
break;
case tagTypes.ENDLIST:
data.hasEndTag = true;
break;
}
});
return data;
};
};
})(this);
(function(parseInt, undefined) {
var Stream, Tokenizer, Parser;
Stream = function() {
var listeners = {};
this.on = function(type, listener) {
if (!listeners[type]) {
listeners[type] = [];
}
listeners[type].push(listener);
};
this.off = function(type, listener) {
var index;
if (!listeners[type]) {
return false;
}
index = listeners[type].indexOf(listener);
listeners[type].splice(index, 1);
return index > -1;
};
this.trigger = function(type) {
var callbacks, i, length, args;
callbacks = listeners[type];
if (!callbacks) {
return;
}
args = Array.prototype.slice.call(arguments, 1);
length = callbacks.length;
for (i = 0; i < length; ++i) {
callbacks[i].apply(this, args);
}
};
};
Stream.prototype.pipe = function(destination) {
this.on('data', function(data) {
destination.push(data);
});
};
Tokenizer = function() {
var
buffer = '',
tokenizer;
this.push = function(data) {
var nextNewline;
buffer += data;
nextNewline = buffer.indexOf('\n');
for (; nextNewline > -1; nextNewline = buffer.indexOf('\n')) {
this.trigger('data', buffer.substring(0, nextNewline));
buffer = buffer.substring(nextNewline + 1);
}
};
};
Tokenizer.prototype = new Stream();
Parser = function() {};
Parser.prototype = new Stream();
Parser.prototype.push = function(line) {
var match, event;
if (line.length === 0) {
// ignore empty lines
return;
}
// URIs
if (line[0] !== '#') {
this.trigger('data', {
type: 'uri',
uri: line
});
return;
}
// Comments
if (line.indexOf('#EXT') !== 0) {
this.trigger('data', {
type: 'comment',
text: line.slice(1)
});
return;
}
// Tags
match = /^#EXTM3U/.exec(line);
if (match) {
this.trigger('data', {
type: 'tag',
tagType: 'm3u'
});
return;
}
match = (/^#EXTINF:?([0-9\.]*)?,?(.*)?$/).exec(line);
if (match) {
event = {
type: 'tag',
tagType: 'inf'
};
if (match[1]) {
event.duration = parseInt(match[1], 10);
}
if (match[2]) {
event.title = match[2];
}
this.trigger('data', event);
return;
}
// unknown tag type
this.trigger('data', {
type: 'tag',
data: line.slice(4, line.length)
});
};
window.videojs.m3u8 = {
Tokenizer: Tokenizer,
Parser: Parser
};
})(window.parseInt);
/***** Start *****/
{
function reduce(rest, attr) {
return rest.reduce(function(prev, curr) {
var p,
currentItem = curr.pop();
for (p in currentItem) {
prev[p] = currentItem[p];
};
return prev;
}, attr);
}
}
start
= tags:lines+ .* {
var choices = {
segments: 1,
comments: 1,
playlists: 1
};
return tags.reduce(function(obj, tag) {
for (var p in tag) {
if (p in choices) {
if (Object.prototype.toString.call(obj[p]) === '[object Array]') {
obj[p].push(tag[p]);
} else {
obj[p] = [tag[p]];
}
} else {
obj[p] = tag[p];
}
return obj;
}
}, {});
}
lines
= comment:comment _ { var obj = {}; obj["comments"] = comment; return obj; }
/ ! comment tag:tag _ { return tag; }
tag
= & comment
/ tag:m3uTag _ { return tag; }
/ tag:extinfTag _ { return tag; }
/ tag:targetDurationTag _ { return tag; }
/ tag:mediaSequenceTag _ { return tag; }
/ tag:keyTag _ { return tag; }
/ tag:programDateTimeTag _ { return tag; }
/ tag:allowCacheTag _ { return tag; }
/ tag:playlistTypeTag _ { return tag; }
/ tag:endlistTag _ { return tag; }
/ tag:mediaTag _ { return tag; }
/ tag:streamInfTag _ { return tag; }
/ tag:discontinuityTag _ { return tag; }
/ tag:discontinuitySequenceTag _ { return tag; }
/ tag:iframesOnlyTag _ { return tag; }
/ tag:mapTag _ { return tag; }
/ tag:iframeStreamInf _ { return tag; }
/ tag:startTag _ { return tag; }
/ tag:versionTag _ { return tag; }
comment "comment"
= & "#" ! "#EXT" text:text+ { return text.join(); }
/***** Tags *****/
m3uTag
= tag:"#EXTM3U" { return {openTag: true}; }
extinfTag
= tag:'#EXTINF' ":" duration:number "," optional:extinfOptionalParts _ url:mediaURL {
return {segments: {
byterange: optional.byteRange || -1,
title: optional.title,
targetDuration: duration,
url: url
}
};
}
byteRangeTag
= tag:"#EXT-X-BYTERANGE" ":" length:int ("@" offset:int)? { return {length: length, offset: offset}; }
targetDurationTag
= tag:"#EXT-X-TARGETDURATION" ":" seconds:int { return {targetDuration: seconds}; }
mediaSequenceTag
= tag:'#EXT-X-MEDIA-SEQUENCE' ":" sequenceNumber:int { return {mediaSequence: sequenceNumber}; }
keyTag
= tag:'#EXT-X-KEY' ":" attrs:keyAttributes { return {key: attrs}; }
programDateTimeTag
= tag:'#EXT-X-PROGRAM-DATE-TIME' ":" date:date
allowCacheTag
= tag:'#EXT-X-ALLOW-CACHE' ":" answer:answer { return {allowCache: answer}; }
playlistTypeTag
= tag:'#EXT-X-PLAYLIST-TYPE' ":" type:playlistType { return {playlistType: type}; }
endlistTag
= tag:'#EXT-X-ENDLIST' { return {closeTag: true}; }
mediaTag
= tag:'#EXT-MEDIA' ":" attrs:mediaAttributes { return {media: attrs}; }
streamInfTag
= tag:'#EXT-X-STREAM-INF' ":" attrs:streamInfAttrs _ url:mediaURL? {
return {playlists: {
attributes: attrs,
url: url
}
};
}
discontinuityTag
= tag:'#EXT-X-DISCONTINUITY'
discontinuitySequenceTag
= tag:'#EXT-X-DISCONTINUITY-SEQUENCE' ":" sequence:int { return {discontinuitySequence: sequence}; }
iframesOnlyTag
= tag:'#EXT-X-I-FRAMES-ONLY'
mapTag
= tag:'#EXT-X-MAP' ":" attrs:mapAttributes { return {map: attrs}; }
iframeStreamInf
= tag:'#EXT-X-I-FRAME-STREAM-INF' ":" attrs:iframeStreamAttrs { return {iframeStream: attrs}; }
startTag
= tag:'EXT-X-START' ":" attrs:startAttributes { return {start: attrs}; }
versionTag
= tag:'#EXT-X-VERSION' ":" version:int { return {version: version}; }
/***** Helpers *****/
extinfOptionalParts
= nonbreakingWhitespace title:text _ byteRange:byteRangeTag? { return {title: title, byteRange: byteRange} }
/ _ byteRange:byteRangeTag? { return {title: '', byteRange: byteRange}; }
mediaURL
= & tag
/ ! tag file:[ -~]+ { return file.join(''); }
keyAttributes
= (attr:keyAttribute rest:(attrSeparator streamInfAttrs)*) { return reduce(rest, attr); }
/ attr:keyAttribute? { return [attr]; }
keyAttribute
= "METHOD" "=" method:keyMethod { return {keyMethod: method}; }
/ "URI" "=" uri:quotedString { return {uri: uri}; }
/ "IV" "=" iv:hexint { return {IV: iv}; }
/ "KEYFORMAT" "=" keyFormat:quotedString { return {keyFormat: keyFormat}; }
/ "KEYFORMATVERSIONS" "=" keyFormatVersions:quotedString { return {keyFormatVersions: keyFormatVersions}; }
keyMethod
= "NONE"
/ "AES-128"
/ "SAMPLE-AES"
mediaAttributes
= (attr:mediaAttribute rest:(attrSeparator mediaAttribute)*) { return reduce(rest, attr); }
/ attr:mediaAttribute? { return [attr] }
mediaAttribute
= "TYPE" "=" type:mediaTypes { return {type: type}; }
/ "URI" "=" uri:quotedString { return {uri: uri}; }
/ "GROUP-ID" "=" groupId:quotedString { return {groupId: groupdId}; }
/ "LANGUAGE" "=" langauge:quotedString { return {language: language}; }
/ "ASSOC-LANGUAGE" "=" assocLanguage:quotedString { return {assocLanguage: assocLanguage}; }
/ "NAME" "=" name:quotedString { return {name: name}; }
/ "DEFAULT" "=" def:answer { return {defaultAnswer: def}; }
/ "AUTOSELECT" "=" autoselect:answer { return {autoselect: autoselect}; }
/ "FORCE" "=" force:answer { return {force: force}; }
/ "INSTREAM-ID" "=" instreamId:quotedString { return {instreamId: instreamId}; }
/ "CHARACTERISTICS" "=" characteristics:quotedString { return {characteristics: characteristics}; }
streamInfAttrs
= (attr:streamInfAttr rest:(attrSeparator streamInfAttr)*) { return reduce(rest, attr); }
/ attr:streamInfAttr?
streamInfAttr
= streamInfSharedAttr
/ "AUDIO" "=" audio:quotedString { return {audio: audio}; }
/ "SUBTITLES" "=" subtitles:quotedString { return {video: video}; }
/ "CLOSED-CAPTIONS" "=" captions:"NONE" { return {closedCaptions: captions}; }
/ "CLOSED-CAPTIONS" "=" captions:quotedString { return {closedCaptions: captions}; }
streamInfSharedAttr
= "PROGRAM-ID" "=" programId:int { return {programId: programId}; }
/ "BANDWIDTH" "=" bandwidth:int { return {bandwidth: bandwidth}; }
/ "CODECS" "=" codec:quotedString { return {codecs: codec}; }
/ "RESOLUTION" "=" resolution:resolution { return {resolution: resolution}; }
/ "VIDEO" "=" video:quotedString { return {video: video}; }
mapAttributes
= (attr:mapAttribute rest:(attrSeparator mapAttribute)*) { return reduce(rest, attr); }
/ attr:mapAttribute?
mapAttribute
= "URI" "=" uri:quotedString { return {uri: uri}; }
/ "BYTERANGE" "=" byteRange:quotedString { return {byterange: byterange}; }
iframeStreamAttrs
= (attr:iframeStreamAttr rest:(attrSeparator iframeStreamAttr)*) { return reduce(rest, attr); }
/ attr:iframeStreamAttr?
iframeStreamAttr
= streamInfSharedAttr
/ "URI" "=" uri:quotedString { return {uri: uri}; }
startAttributes
= (attr:startAttribute rest:(attrSeparator startAttribute)*) { return reduce(rest, attr); }
/ attr:startAttribute?
startAttribute
= "TIME-OFFSET" "=" timeOffset:number { return {timeOffset: timeOffset}; }
/ "PRECISE" "=" precise:answer { return {precise: precise}; }
answer "answer"
= "YES"
/ "NO"
mediaTypes
= "AUDIO"
/ "VIDEO"
/ "SUBTITLES"
/ "CLOSED-CAPTIONS"
playlistType
= "EVENT"
/ "VOD"
attrSeparator
= "," nonbreakingWhitespace { return; }
/***** Date *****/
date "date"
= year:year "-" month:month "-" day:day "T" time:time timezone:timezone
year "year"
= digit digit digit digit
month "month"
= [01] digit
day "day"
= [0-3] digit
time "time"
= [0-2] digit ":" [0-5] digit ":" [0-5] digit "." digit+
/ [0-2] digit ":" [0-5] digit ":" [0-5] digit
/ [0-2] digit ":" [0-5] digit
timezone "timezone"
= [+-] [0-2] digit ":" [0-5] digit
/ "Z"
/***** Numbers *****/
number "number"
= parts:(int frac) _ { return parseFloat(parts.join('')); }
/ parts:(int) _ { return parts; }
resolution
= width:int "x" height:int { return {width: width, height: height}; }
int
= first:digit19 rest:digits { return parseInt(first + rest.join(''), 10); }
/ digit:digit { return parseInt(digit, 10); }
/ neg:"-" first:digit19 rest:digits { return parseInt(neg + first + rest.join(''), 10); }
/ neg:"-" digit:digit { return parseInt(neg + digit, 10); }
hexint
= "0x" hexDigits:hexDigit+ { return '0x' + hexDigits.join(''); }
/ "0X" hexDigits:hexDigit+ { return '0x' + hexDigits.join(''); }
frac
= dec:"." digits:digits { return dec + digits.join(''); }
digits
= digit+
digit
= [0-9]
digit19
= [1-9]
hexDigit
= [0-9a-fA-F]
/***** Text *****/
quotedString
= '"' '"' _ { return ""; }
/ '"' chars:quotedChar+ '"' _ { return chars.join(''); }
quotedChar
= [^\r\n"]
/ char:char
text "text"
= text:char+ { return text.join(''); }
char "char"
= [ -~]
_ "whitespace"
= whitespace*
whitespace
= [ \t\n\r]
nonbreakingWhitespace
= [ \t]*
......@@ -22,6 +22,7 @@
<script src="../src/segment-parser.js"></script>
<!-- M3U8 -->
<script src="../src/m3u8/m3u8-tokenizer.js"></script>
<script src="../src/m3u8/m3u8.js"></script>
<script src="../src/m3u8/m3u8-tag-types.js"></script>
<script src="../build/m3u8-parser.js"></script>
......