From 781cc523af69a98efbd1b93cc89cec76145b8d14 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda - Campagna <filosottile.wiki@gmail.com>
Date: Tue, 10 Apr 2012 18:54:40 +0200
Subject: [PATCH] removed the undocumented HTMLParser.unescape, replaced with
 _unescapeHTML; fixed a bug in the use of _unescapeHTML (missing _, from
 d6a96153471ae7e93693cb4dee46cbec1492af7b)

---
 youtube-dl             | 28 ++++++++++++++--------------
 youtube_dl/__init__.py | 28 ++++++++++++++--------------
 2 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/youtube-dl b/youtube-dl
index 752d762eb..78fb07ea1 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -308,13 +308,13 @@ def clean_html(html):
 	# Strip html tags
 	html = re.sub('<.*?>', '', html)
 	# Replace html entities
-	html = re.sub(ur'(?u)&(.+?);', htmlentity_transform, html)
+	html = _unescapeHTML(html)
 	return html
 
 
 def sanitize_title(utitle):
 	"""Sanitizes a video title so it could be used as part of a filename."""
-	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
+	utitle = _unescapeHTML(utitle)
 	return utitle.replace(unicode(os.sep), u'%')
 
 
@@ -371,8 +371,8 @@ def _unescapeHTML(s):
 	"""
 	assert type(s) == type(u'')
 
-	htmlParser = HTMLParser.HTMLParser()
-	return htmlParser.unescape(s)
+	result = re.sub(ur'(?u)&(.+?);', htmlentity_transform, s)
+	return result
 
 def _encodeFilename(s):
 	"""
@@ -1324,8 +1324,8 @@ class YoutubeIE(InfoExtractor):
 			end = start + float(dur)
 			start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
 			end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
+			caption = _unescapeHTML(caption)
+			caption = _unescapeHTML(caption) # double cycle, inentional
 			srt += str(n) + '\n'
 			srt += start + ' --> ' + end + '\n'
 			srt += caption + '\n\n'
@@ -2143,7 +2143,7 @@ class YahooIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: Unable to extract media URL')
 			return
 		video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
-		video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
+		video_url = _unescapeHTML(video_url)
 
 		try:
 			# Process video information
@@ -3410,11 +3410,11 @@ class EscapistIE(InfoExtractor):
 			return
 
 		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
-		description = unescapeHTML(descMatch.group(1))
+		description = _unescapeHTML(descMatch.group(1))
 		imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
-		imgUrl = unescapeHTML(imgMatch.group(1))
+		imgUrl = _unescapeHTML(imgMatch.group(1))
 		playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
-		playerUrl = unescapeHTML(playerUrlMatch.group(1))
+		playerUrl = _unescapeHTML(playerUrlMatch.group(1))
 		configUrlMatch = re.search('config=(.*)$', playerUrl)
 		configUrl = urllib2.unquote(configUrlMatch.group(1))
 
@@ -3966,20 +3966,20 @@ class StanfordOpenClassroomIE(InfoExtractor):
 
 			m = re.search('<h1>([^<]+)</h1>', coursepage)
 			if m:
-				info['title'] = unescapeHTML(m.group(1))
+				info['title'] = _unescapeHTML(m.group(1))
 			else:
 				info['title'] = info['id']
 			info['stitle'] = _simplify_title(info['title'])
 
 			m = re.search('<description>([^<]+)</description>', coursepage)
 			if m:
-				info['description'] = unescapeHTML(m.group(1))
+				info['description'] = _unescapeHTML(m.group(1))
 
 			links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
 			info['list'] = [
 				{
 					'type': 'reference',
-					'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
+					'url': 'http://openclassroom.stanford.edu/MainFolder/' + _unescapeHTML(vpage),
 				}
 					for vpage in links]
 
@@ -4007,7 +4007,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
 			info['list'] = [
 				{
 					'type': 'reference',
-					'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
+					'url': 'http://openclassroom.stanford.edu/MainFolder/' + _unescapeHTML(cpage),
 				}
 					for cpage in links]
 
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 752d762eb..78fb07ea1 100755
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -308,13 +308,13 @@ def clean_html(html):
 	# Strip html tags
 	html = re.sub('<.*?>', '', html)
 	# Replace html entities
-	html = re.sub(ur'(?u)&(.+?);', htmlentity_transform, html)
+	html = _unescapeHTML(html)
 	return html
 
 
 def sanitize_title(utitle):
 	"""Sanitizes a video title so it could be used as part of a filename."""
-	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
+	utitle = _unescapeHTML(utitle)
 	return utitle.replace(unicode(os.sep), u'%')
 
 
@@ -371,8 +371,8 @@ def _unescapeHTML(s):
 	"""
 	assert type(s) == type(u'')
 
-	htmlParser = HTMLParser.HTMLParser()
-	return htmlParser.unescape(s)
+	result = re.sub(ur'(?u)&(.+?);', htmlentity_transform, s)
+	return result
 
 def _encodeFilename(s):
 	"""
@@ -1324,8 +1324,8 @@ def _closed_captions_xml_to_srt(self, xml_string):
 			end = start + float(dur)
 			start = "%02i:%02i:%02i,%03i" %(start/(60*60), start/60%60, start%60, start%1*1000)
 			end = "%02i:%02i:%02i,%03i" %(end/(60*60), end/60%60, end%60, end%1*1000)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption)
-			caption = re.sub(ur'(?u)&(.+?);', htmlentity_transform, caption) # double cycle, inentional
+			caption = _unescapeHTML(caption)
+			caption = _unescapeHTML(caption) # double cycle, inentional
 			srt += str(n) + '\n'
 			srt += start + ' --> ' + end + '\n'
 			srt += caption + '\n\n'
@@ -2143,7 +2143,7 @@ def _real_extract(self, url, new_video=True):
 			self._downloader.trouble(u'ERROR: Unable to extract media URL')
 			return
 		video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
-		video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
+		video_url = _unescapeHTML(video_url)
 
 		try:
 			# Process video information
@@ -3410,11 +3410,11 @@ def _real_extract(self, url):
 			return
 
 		descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
-		description = unescapeHTML(descMatch.group(1))
+		description = _unescapeHTML(descMatch.group(1))
 		imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
-		imgUrl = unescapeHTML(imgMatch.group(1))
+		imgUrl = _unescapeHTML(imgMatch.group(1))
 		playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
-		playerUrl = unescapeHTML(playerUrlMatch.group(1))
+		playerUrl = _unescapeHTML(playerUrlMatch.group(1))
 		configUrlMatch = re.search('config=(.*)$', playerUrl)
 		configUrl = urllib2.unquote(configUrlMatch.group(1))
 
@@ -3966,20 +3966,20 @@ def _real_extract(self, url):
 
 			m = re.search('<h1>([^<]+)</h1>', coursepage)
 			if m:
-				info['title'] = unescapeHTML(m.group(1))
+				info['title'] = _unescapeHTML(m.group(1))
 			else:
 				info['title'] = info['id']
 			info['stitle'] = _simplify_title(info['title'])
 
 			m = re.search('<description>([^<]+)</description>', coursepage)
 			if m:
-				info['description'] = unescapeHTML(m.group(1))
+				info['description'] = _unescapeHTML(m.group(1))
 
 			links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
 			info['list'] = [
 				{
 					'type': 'reference',
-					'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
+					'url': 'http://openclassroom.stanford.edu/MainFolder/' + _unescapeHTML(vpage),
 				}
 					for vpage in links]
 
@@ -4007,7 +4007,7 @@ def _real_extract(self, url):
 			info['list'] = [
 				{
 					'type': 'reference',
-					'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
+					'url': 'http://openclassroom.stanford.edu/MainFolder/' + _unescapeHTML(cpage),
 				}
 					for cpage in links]