Ich habe alle in anderen Fragen aufgeführten Möglichkeiten ausprobiert, um diesen Code zu debuggen, aber nicht in der Lage, dies zu beheben. Da ich ein Anfänger in Python bin, kann ich die Methode urllib2 nicht mit Anfragen ändern. Bei Anfragen stoße ich auf keyerror: 'id' existiert nicht. Bitte helfen Sie frühestens.Encountering urllib2.HTTPError: HTTP-Fehler 400: Fehlerhafte Anforderung
Python-Code:
import os
os.environ['http_proxy']=''
import urllib
import urllib2
import json
import mysql.connector
import datetime
from config import config
import requests
def connect_db():
connection = mysql.connector.connect(**config)
return connection
def create_post_url(graph_url, APP_ID, APP_SECRET):
#create authenticated post URL
post_args = "/posts/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
post_url = graph_url + post_args
return post_url
def render_to_json(graph_url):
#render graph url call to JSON
web_response = urllib2.Request(graph_url)
response= urllib2.urlopen(web_response)
readable_data= response.read()
json_data = json.loads(readable_data)
return json_data
def scrape_posts_by_date(graph_url, date, post_data, APP_ID, APP_SECRET):
#render URL to JSON
page_posts = render_to_json(graph_url)
#extract next page
next_page = page_posts["paging"]["next"]
#grab all posts
page_posts = page_posts["data"]
#boolean to tell us when to stop collecting
collecting = True
#for each post capture data
#for post in page_posts:
#for each post capture data
for post in page_posts:
try:
likes_count = get_likes_count(post["id"], APP_ID, APP_SECRET)
current_post = [post["id"], post["message"],
post["created_time"],
post["shares"]["count"]]
except Exception:
current_post = [ "error", "error", "error", "error"]
if current_post[2] != "error":
print date
print current_post[3]
#compare dates
if date <= current_post[3]:
post_data.append(current_post)
elif date > current_post[2]:
print "Done collecting"
collecting = False
break
#If we still don't meet date requirements, run on next page
if collecting == True:
scrape_posts_by_date(next_page, date, post_data, APP_ID, APP_SECRET)
return post_data
def get_likes_count(post_id, APP_ID, APP_SECRET):
#create Graph API Call
graph_url = "https://graph.facebook.com/"
likes_args = post_id + "/likes?summary=true&key=value&access_token" + APP_ID + "|" + APP_SECRET
likes_url = graph_url + likes_args
likes_json = render_to_json(likes_url)
#pick out the likes count
count_likes = likes_json["summary"]["total_count"]
return count_likes
def create_comments_url(graph_url, post_id, APP_ID, APP_SECRET):
#create Graph API Call
comments_args = post_id + "/comments/?key=value&access_token=" + APP_ID + "|" + APP_SECRET
comments_url = graph_url + comments_args
return comments_url
def get_comments_data(comments_url, comment_data, post_id):
#render URL to JSON
comments = render_to_json(comments_url)["data"]
#for each comment capture data
for comment in comments:
try:
current_comments = [comment["id"], comment["message"], comment["like_count"],
comment["created_time"], post_id]
print current_comments
comment_data.append(current_comments)
except Exception:
current_comments = ["error", "error", "error", "error", "error"]
#check if there is another page
try:
#extract next page
next_page = comments["paging"]["next"]
except Exception:
next_page = None
#if we have another page, recurse
if next_page is not None:
get_comments_data(next_page, comment_data, post_id)
else:
return comment_data
def main():
#simple data pull App Secret and App ID
APP_SECRET = "app_secret"
APP_ID = "app_id"
#to find go to page's FB page, at the end of URL find username
#e.g. http://facebook.com/walmart, walmart is the username
list_companies = ["walmart", "cisco", "pepsi", "facebook"]
graph_url = "https://graph.facebook.com/"
#the time of last weeks crawl
last_crawl = datetime.datetime.now() - datetime.timedelta(weeks=1)
last_crawl = last_crawl.isoformat()
#create db connection
connection = connect_db()
cursor = connection.cursor()
#SQL statement for adding Facebook page data to database
insert_info = ("INSERT INTO page_info "
"(fb_id, likes, talking_about, username)"
"VALUES (%s, %s, %s, %s)")
#SQL statement for adding post data
insert_posts = ("INSERT INTO post_info "
"(fb_post_id, message, likes_count, time_created, shares, page_id)"
"VALUES (%s, %s, %s, %s, %s, %s)")
#SQL statement for adding comment data
insert_comments = ("INSERT INTO comment_info "
"(comment_id, message, likes_count, time_created, post_id)"
"VALUES (%s, %s, %s, %s, %s)")
for company in list_companies:
#make graph api url with company username
current_page = graph_url + company
#open public page in facebook graph api
json_fbpage = render_to_json(current_page)
#gather our page level JSON Data
page_data = [json_fbpage["id"], json_fbpage["likes"],
json_fbpage["talking_about_count"],
json_fbpage["username"]]
print page_data
#extract post data
post_url = create_post_url(current_page, APP_ID, APP_SECRET)
post_data = []
post_data = scrape_posts_by_date(post_url, last_crawl, post_data)
print post_data
#insert the data we pulled into db
cursor.execute(insert_info, page_data)
#grab primary key
last_key = cursor.lastrowid
comment_data = []
#loop through and insert data
for post in post_data:
post.append(last_key)
cursor.execute(insert_posts, post)
#capture post id of data just inserted
post_key = cursor.lastrowid
print post_key
comment_url = create_comments_url(graph_url, post[0], APP_ID, APP_SECRET)
comments = get_comments_data(comment_url, comment_data, post_key)
#insert comments
for comment in comments:
cursor.execute(insert_comments, comment)
#commit the data to the db
connection.commit()
connection.close()
if __name__ == "__main__":
main()
Dies ist der Fehler, ich bin immer:
Traceback (most recent call last):
File "script.py", line 210, in <module>
main()
File "script.py", line 164, in main
json_fbpage = render_to_json(current_page)
File "script.py", line 26, in render_to_json
response= urllib2.urlopen(web_response)
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 410, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 448, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 400: Bad Request
Bitte bearbeiten Sie Ihre Frage, um den vollständigen Fehler, den Sie erhalten, einzuschließen. – IanAuld
Sie haben zu viel Code in Ihrer Frage. Versuchen Sie, das Problem zu paaren. Und wie @IanAuld sagte, kopieren und fügen Sie Ihre vollständige Traceback, so dass wir helfen können. Anfragen ist wirklich besser als urllib, aber verwenden Sie, was Sie wünschen –
Ich habe den vollen Fehler hinzugefügt. –