|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
From 39c86f23d401f9d7329d94fcbf32b51cbc003b8c Mon Sep 17 00:00:00 2001
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
From: Kovid Goyal <kovid@kovidgoyal.net>
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
Date: Sat, 20 Jul 2019 12:40:26 +0530
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
Subject: [PATCH 03/71] Update WSJ
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
Fixes #1837213 [Private bug](https://bugs.launchpad.net/calibre/+bug/1837213)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
---
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
recipes/wsj.recipe | 98 +++++++++++++++++++++++------------------
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
recipes/wsj_free.recipe | 98 +++++++++++++++++++++++------------------
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
2 files changed, 110 insertions(+), 86 deletions(-)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
index da28f081b3..f40f3fedfe 100644
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
--- a/recipes/wsj.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+++ b/recipes/wsj.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -5,10 +5,7 @@
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
import json
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
-try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- from urllib.parse import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
-except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- from urllib import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+from base64 import standard_b64encode
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from mechanize import Request
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -16,6 +13,16 @@
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from calibre.web.feeds.news import BasicNewsRecipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from css_selectors import Select
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ import urllib.parse as urlparse
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ import urlparse
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ from urllib.parse import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ from urllib import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
needs_subscription = True
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -40,7 +47,7 @@ class WSJ(BasicNewsRecipe):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
ignore_duplicate_articles = {'url'}
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
remove_attributes = ['style', 'data-scrim']
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
needs_subscription = needs_subscription
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- WSJ_ITP = 'https://online.wsj.com/itp/today'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ WSJ_ITP = 'https://www.wsj.com/print-edition/today'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
keep_only_tags = [
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
dict(classes('wsj-article-headline-wrap article_header bigTop__hed bigTop__dek bigTop__captioncredit')),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -87,51 +94,56 @@ def get_cover_url(self):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
# login {{{
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
if needs_subscription:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
def get_browser(self, *a, **kw):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # To understand the signin logic read signin.js from
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # https://id.wsj.com/access/pages/wsj/us/signin.html
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # This is the same login servie as used by Barrons
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # To understand the login logic read app-min.js from
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # https://sso.accounts.dowjones.com/login
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ itp = quote(self.WSJ_ITP, safe='')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ start_url = 'https://accounts.wsj.com/login?target=' + itp
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
kw['user_agent'] = random_user_agent(allow_ie=False)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # self.wsj_itp_page = open('/t/raw.html').read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # return br
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- url = 'https://id.wsj.com/access/pages/wsj/us/signin.html?mg=com-wsj&mg=id-wsj'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # br.set_debug_http(True)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- br.open(url).read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- rurl = 'https://id.wsj.com/auth/submitlogin.json'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- rq = Request(rurl, headers={
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Starting login process...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.open(start_url)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ sso_url = res.geturl()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ query = urlparse.parse_qs(urlparse.urlparse(sso_url).query)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ query = {k:v[0] for k, v in query.items()}
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ request_query = {
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'username': self.username,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'password': self.password,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'client_id': query['client'],
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'sso': 'true',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'tenant': 'sso',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ '_intstate': 'deprecated',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ }
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ for k in 'scope connection nonce state ui_locales ns protocol redirect_uri'.split():
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ request_query[k] = query[k]
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ login_url = 'https://sso.accounts.dowjones.com/usernamepassword/login'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # you can get the version below from lib-min.js
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # search for: str: "x.x.x"
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # This might need to be updated in the future
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = json.dumps({"name": "auth0.js", "version": "7.0.3"})
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if not isinstance(auth0_client, bytes):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = auth0_client.encode('utf-8')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = standard_b64encode(auth0_client)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if isinstance(auth0_client, bytes):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = auth0_client.decode('ascii')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ rq = Request(login_url, headers={
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Accept': 'text/html',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'Accept-Language': 'en-US,en;q=0.8',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Content-Type': 'application/json',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Referer': url,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Auth0-Client': auth0_client.rstrip('='),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'X-HTTP-Method-Override': 'POST',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'X-Requested-With': 'XMLHttpRequest',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- }, data=json.dumps({
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'username': self.username,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'password': self.password,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'realm': 'default',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'savelogin': 'true',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'template': 'default',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'url': quote(self.WSJ_ITP),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- }))
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- r = br.open(rq)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- if r.code != 200:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise ValueError('Failed to login, check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- data = json.loads(r.read())
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # print(data)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- if data.get('result') != 'success':
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise ValueError(
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Failed to login (XHR failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- br.set_cookie('m', data['username'], '.wsj.com')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- r = br.open(data['url'])
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- except Exception:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- self.log.error('Failed to open login url: {}'.format(data['url']))
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- self.wsj_itp_page = raw = r.read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'X-Remote-User': self.username
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ }, data=request_query)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Sending login request...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.open(rq)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if res.code != 200:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ raise ValueError('Failed to login, check your username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ br.select_form(nr=0)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Performing login callback...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.submit()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.wsj_itp_page = raw = res.read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
if b'>Sign Out<' not in raw:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
raise ValueError(
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Failed to login (auth URL failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # open('/t/raw.html', 'w').write(raw)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Failed to login (callback URL failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
return br
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
else:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
def get_browser(self, *a, **kw):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
index e04e210114..25726c0ca3 100644
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
--- a/recipes/wsj_free.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+++ b/recipes/wsj_free.recipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -5,10 +5,7 @@
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
import json
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
-try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- from urllib.parse import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
-except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- from urllib import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+from base64 import standard_b64encode
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from mechanize import Request
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -16,6 +13,16 @@
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from calibre.web.feeds.news import BasicNewsRecipe
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
from css_selectors import Select
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ import urllib.parse as urlparse
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ import urlparse
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ from urllib.parse import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+except ImportError:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ from urllib import quote
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
needs_subscription = False
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -40,7 +47,7 @@ class WSJ(BasicNewsRecipe):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
ignore_duplicate_articles = {'url'}
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
remove_attributes = ['style', 'data-scrim']
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
needs_subscription = needs_subscription
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- WSJ_ITP = 'https://online.wsj.com/itp/today'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ WSJ_ITP = 'https://www.wsj.com/print-edition/today'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
keep_only_tags = [
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
dict(classes('wsj-article-headline-wrap article_header bigTop__hed bigTop__dek bigTop__captioncredit')),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
@@ -87,51 +94,56 @@ def get_cover_url(self):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
# login {{{
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
if needs_subscription:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
def get_browser(self, *a, **kw):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # To understand the signin logic read signin.js from
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # https://id.wsj.com/access/pages/wsj/us/signin.html
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # This is the same login servie as used by Barrons
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # To understand the login logic read app-min.js from
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # https://sso.accounts.dowjones.com/login
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ itp = quote(self.WSJ_ITP, safe='')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ start_url = 'https://accounts.wsj.com/login?target=' + itp
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
kw['user_agent'] = random_user_agent(allow_ie=False)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # self.wsj_itp_page = open('/t/raw.html').read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # return br
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- url = 'https://id.wsj.com/access/pages/wsj/us/signin.html?mg=com-wsj&mg=id-wsj'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # br.set_debug_http(True)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- br.open(url).read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- rurl = 'https://id.wsj.com/auth/submitlogin.json'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- rq = Request(rurl, headers={
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Starting login process...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.open(start_url)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ sso_url = res.geturl()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ query = urlparse.parse_qs(urlparse.urlparse(sso_url).query)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ query = {k:v[0] for k, v in query.items()}
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ request_query = {
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'username': self.username,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'password': self.password,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'client_id': query['client'],
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'sso': 'true',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'tenant': 'sso',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ '_intstate': 'deprecated',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ }
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ for k in 'scope connection nonce state ui_locales ns protocol redirect_uri'.split():
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ request_query[k] = query[k]
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ login_url = 'https://sso.accounts.dowjones.com/usernamepassword/login'
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # you can get the version below from lib-min.js
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # search for: str: "x.x.x"
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ # This might need to be updated in the future
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = json.dumps({"name": "auth0.js", "version": "7.0.3"})
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if not isinstance(auth0_client, bytes):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = auth0_client.encode('utf-8')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = standard_b64encode(auth0_client)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if isinstance(auth0_client, bytes):
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ auth0_client = auth0_client.decode('ascii')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ rq = Request(login_url, headers={
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Accept': 'text/html',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'Accept-Language': 'en-US,en;q=0.8',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Content-Type': 'application/json',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Referer': url,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Auth0-Client': auth0_client.rstrip('='),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'X-HTTP-Method-Override': 'POST',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
'X-Requested-With': 'XMLHttpRequest',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- }, data=json.dumps({
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'username': self.username,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'password': self.password,
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'realm': 'default',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'savelogin': 'true',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'template': 'default',
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'url': quote(self.WSJ_ITP),
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- }))
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- r = br.open(rq)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- if r.code != 200:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise ValueError('Failed to login, check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- data = json.loads(r.read())
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # print(data)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- if data.get('result') != 'success':
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise ValueError(
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Failed to login (XHR failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- br.set_cookie('m', data['username'], '.wsj.com')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- try:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- r = br.open(data['url'])
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- except Exception:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- self.log.error('Failed to open login url: {}'.format(data['url']))
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- raise
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- self.wsj_itp_page = raw = r.read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'X-Remote-User': self.username
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ }, data=request_query)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Sending login request...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.open(rq)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ if res.code != 200:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ raise ValueError('Failed to login, check your username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ br.select_form(nr=0)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.log('Performing login callback...')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ res = br.submit()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ self.wsj_itp_page = raw = res.read()
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
if b'>Sign Out<' not in raw:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
raise ValueError(
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- 'Failed to login (auth URL failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
- # open('/t/raw.html', 'w').write(raw)
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
+ 'Failed to login (callback URL failed), check username and password')
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
return br
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
else:
|
|
![](https://seccdn.libravatar.org/avatar/74d2a4e8b5849d63c6838ef3fb9772b27447d57f0ac32af5cd03096343616233?s=16&d=retro) |
5f720ea |
def get_browser(self, *a, **kw):
|