Ich versuche, mich mit Scrapy in Quora einzuloggen, aber es ist mir nicht gelungen, die Angabe von 400 oder 500 Code entspricht meinen Formdaten.Loggen Sie sich in Quora mit Scrapy ein
fand ich die Formulardaten von Chrome:
General
Request URL:https://www.quora.com/webnode2/server_call_POST?__instart__
Request Method:POST
Status Code:200
Remote Address:103.243.14.60:443
Form Data
json:{"args":[],"kwargs":{"email":"[email protected]","password":"XXXX","passwordless":1}}
formkey:750febacf08976a47c82f3e10af83305
postkey:dab46d0df2014d1568ead6b2fbad7297
window_id:dep3300-2420196009402604566
referring_controller:index
referring_action:index
_lm_transaction_id:0.2598935768985011
_lm_window_id:dep3300-2420196009402604566
__vcon_json:["Vn03YsuKFZvHV9"]
__vcon_method:do_login
__e2e_action_id:ee1qmp1iit
js_init:{}
Next meine Code-Beispiele sind ein normaler Scrapy Fluss. Ich dachte, das Problem liege in den Formdaten. Kann jemand damit helfen?
import scrapy
import re
class QuestionsSpider(scrapy.Spider):
name = 'questions'
domain = 'https://www.quora.com'
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/49.0.2623.108 Chrome/49.0.2623.108 Safari/537.36",
"Accept-Encoding": "gzip, deflate",
"Host": "www.quora.com",
"Connection": "Keep-Alive",
"content-type":"application/x-www-form-urlencoded"
}
def __init__(self, login_url = None):
self.login_url = 'https://www.quora.com/webnode2/server_call_POST?__instart__' # Here is the login URL of Quora
def start_requests(self):
body = response.body
formkey_patt = re.compile(r'.*?"formkey".*?"(.*?)".*?',re.S)
formkey = re.findall(formkey_patt, body)[0]
postkey_patt = re.compile('.*?"postkey".*?"(.*?)".*?',re.S)
postkey = re.findall(postkey_patt, body)[0]
window_id_patt = re.compile('.*?window_id.*?"(.*?)".*?',re.S)
window_id = re.findall(window_id_patt, body)[0]
referring_controller = 'index'
referring_action = 'index'
__vcon_method = 'do_login'
yield scrapy.Request(
url = self.domain,
headers = self.headers,
meta = {'cookiejar':1},
callback = self.start_login
)
def start_login(self,response):
yield scrapy.FormRequest.from_response(
response,
url = self.login_url,
meta = {'cookiejar':response.meta['cookiejar']},
headers = self.headers,
formdata = {"json":{"args":[],"kwargs":{"email":"xxxx","password":"xxx"}},
"formkey":formkey,
"postkey":postkey,
"window_id":window_id,
"referring_controller":referring_controller,
"referring_action":referring_action,
"__vcon_method":__vcon_method,
"__e2e_action_id":"ee1qmp1iit"
},
callback = self.after_login
)
def after_login(self, response):
print response.body
verwenden können Sie Protokolle Ihrer Spinne teilen? – eLRuLL