From 6642c61ccac00e8255af906f45162b8d223e4ebf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 13 Aug 2019 20:12:36 +0530
Subject: [PATCH 64/71] Update CNET News
---
recipes/cnetnews.recipe | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/recipes/cnetnews.recipe b/recipes/cnetnews.recipe
index 3c3ac55f44..a0f9607d12 100644
--- a/recipes/cnetnews.recipe
+++ b/recipes/cnetnews.recipe
@@ -18,6 +18,12 @@
from calibre.web.feeds.news import BasicNewsRecipe
+def classes(classes):
+ q = frozenset(classes.split(' '))
+ return dict(attrs={
+ 'class': lambda x: x and frozenset(x.split()).intersection(q)})
+
+
class CnetNews(BasicNewsRecipe):
title = 'CNET News'
__author__ = 'Kovid Goyal'
@@ -44,10 +50,12 @@ class CnetNews(BasicNewsRecipe):
'data-component': 'imageGalleryModal'}),
dict(attrs={'data-component': 'sharebar'}),
dict(name=['link', 'meta']),
+ classes('playerControls video share-button'),
]
keep_only_tags = [
- dict(itemprop='headline'),
+ dict(name='h1'),
+ dict(section='author'),
dict(id=["article-body", 'cnetReview']),
dict(attrs={'class': 'deal-content'}),
]
@@ -89,4 +97,6 @@ def postprocess_html(self, soup, first_fetch):
h1.extract()
if first_fetch:
soup.find('body').insert(1, h1)
+ for img in soup.findAll('img'):
+ img['height'] = img['width'] = ''
return soup