98b95881 by David LaPalomento

Move to a handrolled parser

Split parsing into tokenization and a very liberal parser. After this, an "interpreter" needs to be created to build an object representation of the manifest based on the events emitted by the parser. Higher-level manifest tests are broken until that interpreter is written.
1 parent 9fa0bf90
1 'use strict'; 1 'use strict';
2 2
3 var peg = require('pegjs');
4
5 module.exports = function(grunt) { 3 module.exports = function(grunt) {
6 4
7 // Project configuration. 5 // Project configuration.
...@@ -97,16 +95,9 @@ module.exports = function(grunt) { ...@@ -97,16 +95,9 @@ module.exports = function(grunt) {
97 grunt.loadNpmTasks('grunt-contrib-jshint'); 95 grunt.loadNpmTasks('grunt-contrib-jshint');
98 grunt.loadNpmTasks('grunt-contrib-watch'); 96 grunt.loadNpmTasks('grunt-contrib-watch');
99 97
100 grunt.registerTask('peg', 'generate the manifest parser', function() {
101 var parser = peg.buildParser(grunt.file.read('src/m3u8/m3u8.pegjs'));
102 grunt.file.write('build/m3u8-parser.js',
103 'window.videojs.hls.M3U8Parser = ' + parser.toSource());
104 });
105
106 // Default task. 98 // Default task.
107 grunt.registerTask('default', 99 grunt.registerTask('default',
108 ['peg', 100 ['jshint',
109 'jshint',
110 'qunit', 101 'qunit',
111 'clean', 102 'clean',
112 'concat', 103 'concat',
......
...@@ -6,21 +6,16 @@ ...@@ -6,21 +6,16 @@
6 }, 6 },
7 "license": "Apache 2", 7 "license": "Apache 2",
8 "scripts": { 8 "scripts": {
9 "test": "grunt qunit", 9 "test": "grunt qunit"
10 "prepublish": "npm run peg",
11 "peg": "pegjs -e 'var M3U8Parser = module.exports' src/m3u8/m3u8.pegjs src/m3u8/m3u8-generated.js",
12 "testpeg": "npm run peg && node test/pegtest.js"
13 }, 10 },
14 "devDependencies": { 11 "devDependencies": {
15 "grunt-contrib-jshint": "~0.6.0", 12 "grunt-contrib-jshint": "~0.6.0",
16 "grunt-contrib-qunit": "~0.2.0", 13 "grunt-contrib-qunit": "~0.2.0",
17 "grunt-contrib-concat": "~0.3.0", 14 "grunt-contrib-concat": "~0.3.0",
18 "grunt-contrib-nodeunit": "~0.1.2",
19 "grunt-contrib-uglify": "~0.2.0", 15 "grunt-contrib-uglify": "~0.2.0",
20 "grunt-contrib-watch": "~0.4.0", 16 "grunt-contrib-watch": "~0.4.0",
21 "grunt-contrib-clean": "~0.4.0", 17 "grunt-contrib-clean": "~0.4.0",
22 "grunt": "~0.4.1", 18 "grunt": "~0.4.1"
23 "pegjs": "git+https://github.com/dmajda/pegjs.git"
24 }, 19 },
25 "dependencies": { 20 "dependencies": {
26 "video.js": "~4.2.2", 21 "video.js": "~4.2.2",
......
1 (function(window) {
2 var M3U8 = window.videojs.hls.M3U8;
3
4 window.videojs.hls.M3U8Parser = function() {
5 var
6 self = this,
7 tagTypes = window.videojs.hls.m3u8TagType,
8 lines = [],
9 data;
10
11 self.getTagType = function(lineData) {
12 for (var s in tagTypes) {
13 if (lineData.indexOf(tagTypes[s]) === 0) {
14 return tagTypes[s];
15 }
16 }
17 };
18
19 self.getTagValue = function(lineData) {
20 for (var s in tagTypes) {
21 if (lineData.indexOf(tagTypes[s]) === 0) {
22 return lineData.substr(tagTypes[s].length);
23 }
24 }
25 };
26
27 self.parse = function(rawDataString) {
28 data = new M3U8();
29
30 if (self.directory) {
31 data.directory = self.directory;
32 }
33
34 if (rawDataString === undefined || rawDataString.length <= 0) {
35 data.invalidReasons.push("Empty Manifest");
36 return;
37 }
38 lines = rawDataString.split('\n');
39
40 lines.forEach(function(value,index) {
41 var segment, rendition, attributes;
42
43 switch (self.getTagType(value)) {
44 case tagTypes.EXTM3U:
45 data.hasValidM3UTag = (index === 0);
46 if (!data.hasValidM3UTag) {
47 data.invalidReasons.push("Invalid EXTM3U Tag");
48 }
49 break;
50
51 case tagTypes.DISCONTINUITY:
52 break;
53
54 case tagTypes.PLAYLIST_TYPE:
55 if (self.getTagValue(value) === "VOD" ||
56 self.getTagValue(value) === "EVENT") {
57 data.playlistType = self.getTagValue(value);
58
59 } else {
60 data.invalidReasons.push("Invalid Playlist Type Value");
61 }
62 break;
63
64 case tagTypes.EXTINF:
65 segment = {
66 url: "unknown",
67 byterange: -1,
68 targetDuration: data.targetDuration
69 };
70
71 if (self.getTagType(lines[index + 1]) === tagTypes.BYTERANGE) {
72 segment.byterange = self.getTagValue(lines[index + 1]).split('@');
73 segment.url = lines[index + 2];
74 } else {
75 segment.url = lines[index + 1];
76 }
77
78 if (segment.url.indexOf("http") === -1 && self.directory) {
79 if (data.directory[data.directory.length-1] === segment.url[0] &&
80 segment.url[0] === "/") {
81 segment.url = segment.url.substr(1);
82 }
83 segment.url = self.directory + segment.url;
84 }
85 data.mediaItems.push(segment);
86 break;
87
88 case tagTypes.STREAM_INF:
89 rendition = {};
90 attributes = value.substr(tagTypes.STREAM_INF.length).split(',');
91
92 attributes.forEach(function(attrValue) {
93 if (isNaN(attrValue.split('=')[1])) {
94 rendition[attrValue.split('=')[0].toLowerCase()] = attrValue.split('=')[1];
95
96 if (rendition[attrValue.split('=')[0].toLowerCase()].split('x').length === 2) {
97 rendition.resolution = {
98 width: parseInt(rendition[attrValue.split('=')[0].toLowerCase()].split('x')[0],10),
99 height: parseInt(rendition[attrValue.split('=')[0].toLowerCase()].split('x')[1],10)
100 };
101 }
102 } else {
103 rendition[attrValue.split('=')[0].toLowerCase()] = parseInt(attrValue.split('=')[1],10);
104 }
105 });
106
107 if (self.getTagType(lines[index + 1]) === tagTypes.BYTERANGE) {
108 rendition.byterange = self.getTagValue(lines[index + 1]).split('@');
109 rendition.url = lines[index + 2];
110 } else {
111 rendition.url = lines[index + 1];
112 }
113
114 data.isPlaylist = true;
115 data.playlistItems.push(rendition);
116 break;
117
118 case tagTypes.TARGETDURATION:
119 data.targetDuration = parseFloat(self.getTagValue(value).split(',')[0]);
120 break;
121
122 case tagTypes.ZEN_TOTAL_DURATION:
123 data.totalDuration = parseFloat(self.getTagValue(value));
124 break;
125
126 case tagTypes.VERSION:
127 data.version = parseFloat(self.getTagValue(value));
128 break;
129
130 case tagTypes.MEDIA_SEQUENCE:
131 data.mediaSequence = parseInt(self.getTagValue(value),10);
132 break;
133
134 case tagTypes.ALLOW_CACHE:
135 if (self.getTagValue(value) === "YES" || self.getTagValue(value) === "NO") {
136 data.allowCache = self.getTagValue(value);
137 } else {
138 data.invalidReasons.push("Invalid ALLOW_CACHE Value");
139 }
140 break;
141
142 case tagTypes.ENDLIST:
143 data.hasEndTag = true;
144 break;
145 }
146 });
147
148 return data;
149 };
150 };
151 })(this);
1 (function(parseInt, undefined) {
2 var Stream, Tokenizer, Parser;
3
4 Stream = function() {
5 var listeners = {};
6 this.on = function(type, listener) {
7 if (!listeners[type]) {
8 listeners[type] = [];
9 }
10 listeners[type].push(listener);
11 };
12 this.off = function(type, listener) {
13 var index;
14 if (!listeners[type]) {
15 return false;
16 }
17 index = listeners[type].indexOf(listener);
18 listeners[type].splice(index, 1);
19 return index > -1;
20 };
21 this.trigger = function(type) {
22 var callbacks, i, length, args;
23 callbacks = listeners[type];
24 if (!callbacks) {
25 return;
26 }
27 args = Array.prototype.slice.call(arguments, 1);
28 length = callbacks.length;
29 for (i = 0; i < length; ++i) {
30 callbacks[i].apply(this, args);
31 }
32 };
33 };
34 Stream.prototype.pipe = function(destination) {
35 this.on('data', function(data) {
36 destination.push(data);
37 });
38 };
39
40 Tokenizer = function() {
41 var
42 buffer = '',
43 tokenizer;
44
45 this.push = function(data) {
46 var nextNewline;
47
48 buffer += data;
49 nextNewline = buffer.indexOf('\n');
50
51 for (; nextNewline > -1; nextNewline = buffer.indexOf('\n')) {
52 this.trigger('data', buffer.substring(0, nextNewline));
53 buffer = buffer.substring(nextNewline + 1);
54 }
55 };
56 };
57 Tokenizer.prototype = new Stream();
58
59 Parser = function() {};
60 Parser.prototype = new Stream();
61 Parser.prototype.push = function(line) {
62 var match, event;
63 if (line.length === 0) {
64 // ignore empty lines
65 return;
66 }
67
68 // URIs
69 if (line[0] !== '#') {
70 this.trigger('data', {
71 type: 'uri',
72 uri: line
73 });
74 return;
75 }
76
77 // Comments
78 if (line.indexOf('#EXT') !== 0) {
79 this.trigger('data', {
80 type: 'comment',
81 text: line.slice(1)
82 });
83 return;
84 }
85
86 // Tags
87 match = /^#EXTM3U/.exec(line);
88 if (match) {
89 this.trigger('data', {
90 type: 'tag',
91 tagType: 'm3u'
92 });
93 return;
94 }
95 match = (/^#EXTINF:?([0-9\.]*)?,?(.*)?$/).exec(line);
96 if (match) {
97 event = {
98 type: 'tag',
99 tagType: 'inf'
100 };
101 if (match[1]) {
102 event.duration = parseInt(match[1], 10);
103 }
104 if (match[2]) {
105 event.title = match[2];
106 }
107 this.trigger('data', event);
108 return;
109 }
110
111 // unknown tag type
112 this.trigger('data', {
113 type: 'tag',
114 data: line.slice(4, line.length)
115 });
116 };
117
118 window.videojs.m3u8 = {
119 Tokenizer: Tokenizer,
120 Parser: Parser
121 };
122 })(window.parseInt);
1 /***** Start *****/
2 {
3 function reduce(rest, attr) {
4 return rest.reduce(function(prev, curr) {
5 var p,
6 currentItem = curr.pop();
7 for (p in currentItem) {
8 prev[p] = currentItem[p];
9 };
10 return prev;
11 }, attr);
12 }
13 }
14
15 start
16 = tags:lines+ .* {
17 var choices = {
18 segments: 1,
19 comments: 1,
20 playlists: 1
21 };
22 return tags.reduce(function(obj, tag) {
23 for (var p in tag) {
24 if (p in choices) {
25 if (Object.prototype.toString.call(obj[p]) === '[object Array]') {
26 obj[p].push(tag[p]);
27 } else {
28 obj[p] = [tag[p]];
29 }
30 } else {
31 obj[p] = tag[p];
32 }
33
34 return obj;
35 }
36 }, {});
37 }
38
39 lines
40 = comment:comment _ { var obj = {}; obj["comments"] = comment; return obj; }
41 / ! comment tag:tag _ { return tag; }
42
43 tag
44 = & comment
45 / tag:m3uTag _ { return tag; }
46 / tag:extinfTag _ { return tag; }
47 / tag:targetDurationTag _ { return tag; }
48 / tag:mediaSequenceTag _ { return tag; }
49 / tag:keyTag _ { return tag; }
50 / tag:programDateTimeTag _ { return tag; }
51 / tag:allowCacheTag _ { return tag; }
52 / tag:playlistTypeTag _ { return tag; }
53 / tag:endlistTag _ { return tag; }
54 / tag:mediaTag _ { return tag; }
55 / tag:streamInfTag _ { return tag; }
56 / tag:discontinuityTag _ { return tag; }
57 / tag:discontinuitySequenceTag _ { return tag; }
58 / tag:iframesOnlyTag _ { return tag; }
59 / tag:mapTag _ { return tag; }
60 / tag:iframeStreamInf _ { return tag; }
61 / tag:startTag _ { return tag; }
62 / tag:versionTag _ { return tag; }
63
64 comment "comment"
65 = & "#" ! "#EXT" text:text+ { return text.join(); }
66
67 /***** Tags *****/
68
69 m3uTag
70 = tag:"#EXTM3U" { return {openTag: true}; }
71
72 extinfTag
73 = tag:'#EXTINF' ":" duration:number "," optional:extinfOptionalParts _ url:mediaURL {
74 return {segments: {
75 byterange: optional.byteRange || -1,
76 title: optional.title,
77 targetDuration: duration,
78 url: url
79 }
80 };
81 }
82
83 byteRangeTag
84 = tag:"#EXT-X-BYTERANGE" ":" length:int ("@" offset:int)? { return {length: length, offset: offset}; }
85
86 targetDurationTag
87 = tag:"#EXT-X-TARGETDURATION" ":" seconds:int { return {targetDuration: seconds}; }
88
89 mediaSequenceTag
90 = tag:'#EXT-X-MEDIA-SEQUENCE' ":" sequenceNumber:int { return {mediaSequence: sequenceNumber}; }
91
92 keyTag
93 = tag:'#EXT-X-KEY' ":" attrs:keyAttributes { return {key: attrs}; }
94
95 programDateTimeTag
96 = tag:'#EXT-X-PROGRAM-DATE-TIME' ":" date:date
97
98 allowCacheTag
99 = tag:'#EXT-X-ALLOW-CACHE' ":" answer:answer { return {allowCache: answer}; }
100
101 playlistTypeTag
102 = tag:'#EXT-X-PLAYLIST-TYPE' ":" type:playlistType { return {playlistType: type}; }
103
104 endlistTag
105 = tag:'#EXT-X-ENDLIST' { return {closeTag: true}; }
106
107 mediaTag
108 = tag:'#EXT-MEDIA' ":" attrs:mediaAttributes { return {media: attrs}; }
109
110 streamInfTag
111 = tag:'#EXT-X-STREAM-INF' ":" attrs:streamInfAttrs _ url:mediaURL? {
112 return {playlists: {
113 attributes: attrs,
114 url: url
115 }
116 };
117 }
118
119 discontinuityTag
120 = tag:'#EXT-X-DISCONTINUITY'
121
122 discontinuitySequenceTag
123 = tag:'#EXT-X-DISCONTINUITY-SEQUENCE' ":" sequence:int { return {discontinuitySequence: sequence}; }
124
125 iframesOnlyTag
126 = tag:'#EXT-X-I-FRAMES-ONLY'
127
128 mapTag
129 = tag:'#EXT-X-MAP' ":" attrs:mapAttributes { return {map: attrs}; }
130
131 iframeStreamInf
132 = tag:'#EXT-X-I-FRAME-STREAM-INF' ":" attrs:iframeStreamAttrs { return {iframeStream: attrs}; }
133
134 startTag
135 = tag:'EXT-X-START' ":" attrs:startAttributes { return {start: attrs}; }
136
137 versionTag
138 = tag:'#EXT-X-VERSION' ":" version:int { return {version: version}; }
139
140 /***** Helpers *****/
141
142 extinfOptionalParts
143 = nonbreakingWhitespace title:text _ byteRange:byteRangeTag? { return {title: title, byteRange: byteRange} }
144 / _ byteRange:byteRangeTag? { return {title: '', byteRange: byteRange}; }
145
146 mediaURL
147 = & tag
148 / ! tag file:[ -~]+ { return file.join(''); }
149
150 keyAttributes
151 = (attr:keyAttribute rest:(attrSeparator streamInfAttrs)*) { return reduce(rest, attr); }
152 / attr:keyAttribute? { return [attr]; }
153
154 keyAttribute
155 = "METHOD" "=" method:keyMethod { return {keyMethod: method}; }
156 / "URI" "=" uri:quotedString { return {uri: uri}; }
157 / "IV" "=" iv:hexint { return {IV: iv}; }
158 / "KEYFORMAT" "=" keyFormat:quotedString { return {keyFormat: keyFormat}; }
159 / "KEYFORMATVERSIONS" "=" keyFormatVersions:quotedString { return {keyFormatVersions: keyFormatVersions}; }
160
161 keyMethod
162 = "NONE"
163 / "AES-128"
164 / "SAMPLE-AES"
165
166 mediaAttributes
167 = (attr:mediaAttribute rest:(attrSeparator mediaAttribute)*) { return reduce(rest, attr); }
168 / attr:mediaAttribute? { return [attr] }
169
170 mediaAttribute
171 = "TYPE" "=" type:mediaTypes { return {type: type}; }
172 / "URI" "=" uri:quotedString { return {uri: uri}; }
173 / "GROUP-ID" "=" groupId:quotedString { return {groupId: groupdId}; }
174 / "LANGUAGE" "=" langauge:quotedString { return {language: language}; }
175 / "ASSOC-LANGUAGE" "=" assocLanguage:quotedString { return {assocLanguage: assocLanguage}; }
176 / "NAME" "=" name:quotedString { return {name: name}; }
177 / "DEFAULT" "=" def:answer { return {defaultAnswer: def}; }
178 / "AUTOSELECT" "=" autoselect:answer { return {autoselect: autoselect}; }
179 / "FORCE" "=" force:answer { return {force: force}; }
180 / "INSTREAM-ID" "=" instreamId:quotedString { return {instreamId: instreamId}; }
181 / "CHARACTERISTICS" "=" characteristics:quotedString { return {characteristics: characteristics}; }
182
183 streamInfAttrs
184 = (attr:streamInfAttr rest:(attrSeparator streamInfAttr)*) { return reduce(rest, attr); }
185 / attr:streamInfAttr?
186
187 streamInfAttr
188 = streamInfSharedAttr
189 / "AUDIO" "=" audio:quotedString { return {audio: audio}; }
190 / "SUBTITLES" "=" subtitles:quotedString { return {video: video}; }
191 / "CLOSED-CAPTIONS" "=" captions:"NONE" { return {closedCaptions: captions}; }
192 / "CLOSED-CAPTIONS" "=" captions:quotedString { return {closedCaptions: captions}; }
193
194 streamInfSharedAttr
195 = "PROGRAM-ID" "=" programId:int { return {programId: programId}; }
196 / "BANDWIDTH" "=" bandwidth:int { return {bandwidth: bandwidth}; }
197 / "CODECS" "=" codec:quotedString { return {codecs: codec}; }
198 / "RESOLUTION" "=" resolution:resolution { return {resolution: resolution}; }
199 / "VIDEO" "=" video:quotedString { return {video: video}; }
200
201 mapAttributes
202 = (attr:mapAttribute rest:(attrSeparator mapAttribute)*) { return reduce(rest, attr); }
203 / attr:mapAttribute?
204
205 mapAttribute
206 = "URI" "=" uri:quotedString { return {uri: uri}; }
207 / "BYTERANGE" "=" byteRange:quotedString { return {byterange: byterange}; }
208
209 iframeStreamAttrs
210 = (attr:iframeStreamAttr rest:(attrSeparator iframeStreamAttr)*) { return reduce(rest, attr); }
211 / attr:iframeStreamAttr?
212
213 iframeStreamAttr
214 = streamInfSharedAttr
215 / "URI" "=" uri:quotedString { return {uri: uri}; }
216
217 startAttributes
218 = (attr:startAttribute rest:(attrSeparator startAttribute)*) { return reduce(rest, attr); }
219 / attr:startAttribute?
220
221 startAttribute
222 = "TIME-OFFSET" "=" timeOffset:number { return {timeOffset: timeOffset}; }
223 / "PRECISE" "=" precise:answer { return {precise: precise}; }
224
225 answer "answer"
226 = "YES"
227 / "NO"
228
229 mediaTypes
230 = "AUDIO"
231 / "VIDEO"
232 / "SUBTITLES"
233 / "CLOSED-CAPTIONS"
234
235 playlistType
236 = "EVENT"
237 / "VOD"
238
239 attrSeparator
240 = "," nonbreakingWhitespace { return; }
241
242 /***** Date *****/
243
244 date "date"
245 = year:year "-" month:month "-" day:day "T" time:time timezone:timezone
246
247 year "year"
248 = digit digit digit digit
249
250 month "month"
251 = [01] digit
252
253 day "day"
254 = [0-3] digit
255
256 time "time"
257 = [0-2] digit ":" [0-5] digit ":" [0-5] digit "." digit+
258 / [0-2] digit ":" [0-5] digit ":" [0-5] digit
259 / [0-2] digit ":" [0-5] digit
260
261 timezone "timezone"
262 = [+-] [0-2] digit ":" [0-5] digit
263 / "Z"
264
265 /***** Numbers *****/
266
267 number "number"
268 = parts:(int frac) _ { return parseFloat(parts.join('')); }
269 / parts:(int) _ { return parts; }
270
271 resolution
272 = width:int "x" height:int { return {width: width, height: height}; }
273
274 int
275 = first:digit19 rest:digits { return parseInt(first + rest.join(''), 10); }
276 / digit:digit { return parseInt(digit, 10); }
277 / neg:"-" first:digit19 rest:digits { return parseInt(neg + first + rest.join(''), 10); }
278 / neg:"-" digit:digit { return parseInt(neg + digit, 10); }
279
280 hexint
281 = "0x" hexDigits:hexDigit+ { return '0x' + hexDigits.join(''); }
282 / "0X" hexDigits:hexDigit+ { return '0x' + hexDigits.join(''); }
283
284 frac
285 = dec:"." digits:digits { return dec + digits.join(''); }
286
287 digits
288 = digit+
289
290 digit
291 = [0-9]
292
293 digit19
294 = [1-9]
295
296 hexDigit
297 = [0-9a-fA-F]
298
299 /***** Text *****/
300
301 quotedString
302 = '"' '"' _ { return ""; }
303 / '"' chars:quotedChar+ '"' _ { return chars.join(''); }
304
305 quotedChar
306 = [^\r\n"]
307 / char:char
308
309 text "text"
310 = text:char+ { return text.join(''); }
311
312 char "char"
313 = [ -~]
314
315 _ "whitespace"
316 = whitespace*
317
318 whitespace
319 = [ \t\n\r]
320
321 nonbreakingWhitespace
322 = [ \t]*
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
22 <script src="../src/segment-parser.js"></script> 22 <script src="../src/segment-parser.js"></script>
23 23
24 <!-- M3U8 --> 24 <!-- M3U8 -->
25 <script src="../src/m3u8/m3u8-tokenizer.js"></script>
25 <script src="../src/m3u8/m3u8.js"></script> 26 <script src="../src/m3u8/m3u8.js"></script>
26 <script src="../src/m3u8/m3u8-tag-types.js"></script> 27 <script src="../src/m3u8/m3u8-tag-types.js"></script>
27 <script src="../build/m3u8-parser.js"></script> 28 <script src="../build/m3u8-parser.js"></script>
......