[utils] Improve and test js_to_json

This commit is contained in:
Philipp Hagemeister 2014-09-30 11:12:59 +02:00
parent 410f3e73ab
commit e7b6d12254
3 changed files with 38 additions and 25 deletions

View File

@ -332,14 +332,28 @@ def test_escape_url(self):
) )
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
def test_js_to_json(self): def test_js_to_json_realworld(self):
inp = '''{ inp = '''{
'clip':{'provider':'pseudo'} 'clip':{'provider':'pseudo'}
}''' }'''
self.assertEqual(js_to_json(inp), '''{ self.assertEqual(js_to_json(inp), '''{
"clip":{"provider":"pseudo"} "clip":{"provider":"pseudo"}
}''') }''')
json.loads(js_to_json(inp)) json.loads(js_to_json(inp))
inp = '''{
'playlist':[{'controls':{'all':null}}]
}'''
self.assertEqual(js_to_json(inp), '''{
"playlist":[{"controls":{"all":null}}]
}''')
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
on = js_to_json('{"abc": true}')
self.assertEqual(json.loads(on), {'abc': True})
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -334,7 +334,11 @@ def _download_json(self, url_or_request, video_id,
try: try:
return json.loads(json_string) return json.loads(json_string)
except ValueError as ve: except ValueError as ve:
raise ExtractorError('Failed to download JSON', cause=ve) errmsg = '%s: Failed to parse JSON ' % video_id
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
self.report_warning(errmsg + str(ve))
def report_warning(self, msg, video_id=None): def report_warning(self, msg, video_id=None):
idstr = '' if video_id is None else '%s: ' % video_id idstr = '' if video_id is None else '%s: ' % video_id

View File

@ -1580,29 +1580,24 @@ def strip_jsonp(code):
def js_to_json(code): def js_to_json(code):
def fix_kv(m): def fix_kv(m):
key = m.group(2) v = m.group(0)
if key.startswith("'"): if v in ('true', 'false', 'null'):
assert key.endswith("'") return v
assert '"' not in key if v.startswith('"'):
key = '"%s"' % key[1:-1] return v
elif not key.startswith('"'): if v.startswith("'"):
key = '"%s"' % key v = v[1:-1]
v = re.sub(r"\\\\|\\'|\"", lambda m: {
value = m.group(4) '\\\\': '\\\\',
if value.startswith("'"): "\\'": "'",
assert value.endswith("'") '"': '\\"',
assert '"' not in value }[m.group(0)], v)
value = '"%s"' % value[1:-1] return '"%s"' % v
return m.group(1) + key + m.group(3) + value
res = re.sub(r'''(?x) res = re.sub(r'''(?x)
([{,]\s*) "(?:[^"\\]*(?:\\\\|\\")?)*"|
("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) '(?:[^'\\]*(?:\\\\|\\')?)*'|
(:\s*) [a-zA-Z_][a-zA-Z_0-9]*
([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|
(?=\[|\{)
)
''', fix_kv, code) ''', fix_kv, code)
res = re.sub(r',(\s*\])', lambda m: m.group(1), res) res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
return res return res