2016-04-09 12 views
0

Ich habe alle in anderen Fragen aufgeführten Möglichkeiten ausprobiert, um diesen Code zu debuggen, aber nicht in der Lage, dies zu beheben. Da ich ein Anfänger in Python bin, kann ich die Methode urllib2 nicht mit Anfragen ändern. Bei Anfragen stoße ich auf keyerror: 'id' existiert nicht. Bitte helfen Sie frühestens.Encountering urllib2.HTTPError: HTTP-Fehler 400: Fehlerhafte Anforderung

Python-Code:

import os 
os.environ['http_proxy']='' 
import urllib 
import urllib2 
import json 
import mysql.connector 
import datetime 
from config import config 
import requests 

def connect_db(): 
    connection = mysql.connector.connect(**config) 
    return connection 


def create_post_url(graph_url, APP_ID, APP_SECRET): 
    #create authenticated post URL 
    post_args = "/posts/?key=value&access_token=" + APP_ID + "|" + APP_SECRET 
    post_url = graph_url + post_args 

    return post_url 

def render_to_json(graph_url): 
    #render graph url call to JSON 
    web_response = urllib2.Request(graph_url) 
    response= urllib2.urlopen(web_response) 
    readable_data= response.read() 
    json_data = json.loads(readable_data) 

    return json_data 

def scrape_posts_by_date(graph_url, date, post_data, APP_ID, APP_SECRET): 
    #render URL to JSON 
    page_posts = render_to_json(graph_url) 

    #extract next page 
    next_page = page_posts["paging"]["next"] 

    #grab all posts 
    page_posts = page_posts["data"] 

    #boolean to tell us when to stop collecting 
    collecting = True 

    #for each post capture data 
    #for post in page_posts: 
      #for each post capture data 
    for post in page_posts: 
     try: 
      likes_count = get_likes_count(post["id"], APP_ID, APP_SECRET) 
      current_post = [post["id"], post["message"], 
        post["created_time"], 
             post["shares"]["count"]]   

     except Exception: 
      current_post = [ "error", "error", "error", "error"] 

     if current_post[2] != "error": 
      print date 
      print current_post[3] 
      #compare dates 
      if date <= current_post[3]: 
       post_data.append(current_post) 

      elif date > current_post[2]: 
       print "Done collecting" 
       collecting = False 
       break 


    #If we still don't meet date requirements, run on next page   
    if collecting == True: 
     scrape_posts_by_date(next_page, date, post_data, APP_ID, APP_SECRET) 

    return post_data 

def get_likes_count(post_id, APP_ID, APP_SECRET): 
    #create Graph API Call 
    graph_url = "https://graph.facebook.com/" 
    likes_args = post_id + "/likes?summary=true&key=value&access_token" + APP_ID + "|" + APP_SECRET 
    likes_url = graph_url + likes_args 
    likes_json = render_to_json(likes_url) 

    #pick out the likes count 
    count_likes = likes_json["summary"]["total_count"] 

    return count_likes 

def create_comments_url(graph_url, post_id, APP_ID, APP_SECRET): 
    #create Graph API Call 
    comments_args = post_id + "/comments/?key=value&access_token=" + APP_ID + "|" + APP_SECRET 
    comments_url = graph_url + comments_args 

    return comments_url 

def get_comments_data(comments_url, comment_data, post_id): 
    #render URL to JSON 
    comments = render_to_json(comments_url)["data"] 

    #for each comment capture data 
    for comment in comments: 
     try: 
      current_comments = [comment["id"], comment["message"], comment["like_count"], 
         comment["created_time"], post_id] 
      print current_comments 
      comment_data.append(current_comments) 

     except Exception: 
      current_comments = ["error", "error", "error", "error", "error"] 


    #check if there is another page 
    try: 
     #extract next page 
     next_page = comments["paging"]["next"] 
    except Exception: 
     next_page = None 


    #if we have another page, recurse 
    if next_page is not None: 
     get_comments_data(next_page, comment_data, post_id) 
    else: 
     return comment_data 

def main(): 
    #simple data pull App Secret and App ID 
    APP_SECRET = "app_secret" 
    APP_ID = "app_id" 

    #to find go to page's FB page, at the end of URL find username 
    #e.g. http://facebook.com/walmart, walmart is the username 
    list_companies = ["walmart", "cisco", "pepsi", "facebook"] 
    graph_url = "https://graph.facebook.com/" 

    #the time of last weeks crawl 
    last_crawl = datetime.datetime.now() - datetime.timedelta(weeks=1) 
    last_crawl = last_crawl.isoformat() 

    #create db connection 
    connection = connect_db() 
    cursor = connection.cursor() 

    #SQL statement for adding Facebook page data to database 
    insert_info = ("INSERT INTO page_info " 
        "(fb_id, likes, talking_about, username)" 
        "VALUES (%s, %s, %s, %s)") 

    #SQL statement for adding post data    
    insert_posts = ("INSERT INTO post_info " 
        "(fb_post_id, message, likes_count, time_created, shares, page_id)" 
        "VALUES (%s, %s, %s, %s, %s, %s)") 

    #SQL statement for adding comment data 
    insert_comments = ("INSERT INTO comment_info " 
         "(comment_id, message, likes_count, time_created, post_id)" 
         "VALUES (%s, %s, %s, %s, %s)") 

    for company in list_companies: 
     #make graph api url with company username 
     current_page = graph_url + company 

     #open public page in facebook graph api 
     json_fbpage = render_to_json(current_page) 


     #gather our page level JSON Data 
     page_data = [json_fbpage["id"], json_fbpage["likes"], 
        json_fbpage["talking_about_count"], 
        json_fbpage["username"]] 
     print page_data 

     #extract post data 
     post_url = create_post_url(current_page, APP_ID, APP_SECRET) 
     post_data = [] 
     post_data = scrape_posts_by_date(post_url, last_crawl, post_data) 

     print post_data 

     #insert the data we pulled into db 
     cursor.execute(insert_info, page_data) 

     #grab primary key 
     last_key = cursor.lastrowid 

     comment_data = [] 

     #loop through and insert data 
     for post in post_data: 
      post.append(last_key) 
      cursor.execute(insert_posts, post) 

      #capture post id of data just inserted 
      post_key = cursor.lastrowid 
      print post_key 
      comment_url = create_comments_url(graph_url, post[0], APP_ID, APP_SECRET) 
      comments = get_comments_data(comment_url, comment_data, post_key) 

      #insert comments 
      for comment in comments: 
       cursor.execute(insert_comments, comment) 

     #commit the data to the db 
     connection.commit() 

    connection.close() 


if __name__ == "__main__": 
    main()  

Dies ist der Fehler, ich bin immer:

Traceback (most recent call last): 
    File "script.py", line 210, in <module> 
    main()  
    File "script.py", line 164, in main 
    json_fbpage = render_to_json(current_page) 
    File "script.py", line 26, in render_to_json 
    response= urllib2.urlopen(web_response) 
    File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen 
    return _opener.open(url, data, timeout) 
    File "/usr/lib/python2.7/urllib2.py", line 410, in open 
    response = meth(req, response) 
    File "/usr/lib/python2.7/urllib2.py", line 523, in http_response 
    'http', request, response, code, msg, hdrs) 
    File "/usr/lib/python2.7/urllib2.py", line 448, in error 
    return self._call_chain(*args) 
    File "/usr/lib/python2.7/urllib2.py", line 382, in _call_chain 
    result = func(*args) 
    File "/usr/lib/python2.7/urllib2.py", line 531, in http_error_default 
    raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) 
urllib2.HTTPError: HTTP Error 400: Bad Request 
+0

Bitte bearbeiten Sie Ihre Frage, um den vollständigen Fehler, den Sie erhalten, einzuschließen. – IanAuld

+0

Sie haben zu viel Code in Ihrer Frage. Versuchen Sie, das Problem zu paaren. Und wie @IanAuld sagte, kopieren und fügen Sie Ihre vollständige Traceback, so dass wir helfen können. Anfragen ist wirklich besser als urllib, aber verwenden Sie, was Sie wünschen –

+0

Ich habe den vollen Fehler hinzugefügt. –

Antwort

0

Der Fehler ist auf der Seite Informationen URL Ihre Zugriffstoken anfordern. Wenn das Zugriffstoken nicht vorhanden ist, wenn Sie die Graph-API für die Seiteninformationen verwenden, wird derselbe Fehler ausgegeben.

Sie müssen die Details in der Variablen current_page ändern, um das Zugriffstoken hinzuzufügen.