pyvideo · redapple · Nov 18, 2015 · Nov 18, 2015 · Nov 18, 2015 · Nov 18, 2015
diff --git a/steve/scrapers.py b/steve/scrapers.py
@@ -6,10 +6,10 @@
 # license.
 #######################################################################
 
-import json
-import subprocess
 from datetime import datetime
 
+import youtube_dl
+
 from steve.util import is_youtube
 
 
@@ -27,7 +27,7 @@ class YoutubeScraper(object):
     def transform_item(self, item):
         """Converts youtube-dl output to richard fields"""
         return {
-            'title': item['fulltitle'],
+            'title': item.get('fulltitle') or item['title'],
             'summary': item['description'],
             'description': '',
             'state': 2,  # Draft
@@ -41,7 +41,7 @@ def transform_item(self, item):
             'whiteboard': '',
             'recorded': datetime.strptime(item['upload_date'], '%Y%m%d'),
             'slug': '',
-            'tags': item['categories'],
+            'tags': item.get('categories', []) + item.get('tags', []),
             'speakers': []
         }
 
@@ -50,23 +50,10 @@ def scrape(self, url):
         if not is_youtube(url):
             return
 
-        # FIXME: Sometimes youtube-dl takes a *long* time to run. This
-        # needs to give indication of progress.
-        try:
-            output = subprocess.check_output(
-                ['youtube-dl', '-j', url],
-                stderr=subprocess.STDOUT
-            )
-        except subprocess.CalledProcessError as cpe:
-            raise ScraperError('youtube-dl said "{0}".'.format(cpe.output))
-        except OSError:
-            raise ScraperError('youtube-dl not installed or not on PATH.')
-
-        # Each line is a single JSON object.
-        items = []
-        for line in output.splitlines():
-            items.append(json.loads(line))
-
-        items = [self.transform_item(item) for item in items]
+        with youtube_dl.YoutubeDL() as ydl:
+            result = ydl.extract_info(url, download=False)
 
-        return items
+        if 'entries' in result:
+            return [self.transform_item(item) for item in result['entries']]
+        else:
+            return [self.transform_item(result)]