From 6599c72527ca8434589c010c48164494ab4c2469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:50:36 +0700 Subject: [PATCH] [tube8] Extract categories and tags (Closes #10579) --- youtube_dl/extractor/tube8.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c215..e937b23966 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ def _real_extract(self, url): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ def _real_extract(self, url): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info