2016-08-03 19 views
-2

Dieses kleine Skript schreibt Schlüsselwörter in eine Datei, fügt aber zwischen jedem Schlüsselwort einen zusätzlichen Zeilenumbruch hinzu. Wie kann ich es stoppen? I.e. stattEntfernen Sie zusätzliche Zeilenumbrüche beim Schreiben in eine Datei

Apple 

Banana 

Crayon 

Ich möchte

Apple 
Banana 
Crayon 

Ich habe versucht, "listwrite" googeln aber nicht helfen.

Ich bin sicher, das ist eine sehr einfache Sache, aber ich kann es nicht herausfinden.

#!/usr/local/bin/python 
################################################### 
# nerv3.py 
# Goal: Named entity recognition script to pull names/place from text 
# called as python nerv3.py text_path_or_file 
# 
# Inputs: 
# path - text file or directory containing text files 
# output - output file name 
# uuid 
# Outputs: 
# Output file written 
# People, Places, Others files 
# 
################################################### 

#gonna need to install AlchemyAPI 
import AlchemyAPI 
import argparse 
import xml.etree.ElementTree as ET 
import collections 
import codecs 
import os 
#from IPython import embed 
#================================================= 
def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n\n" % item) 

#================================================= 

def main(): 

    tmpdir = "/tmp/pagekicker" 

    #personal api key saved as api_key.txt 
    parser = argparse.ArgumentParser() 
    parser.add_argument('path', help = "target file or directory for NER") 
    parser.add_argument('output', help = "target file for output") 
    parser.add_argument('uuid', help = "uuid") 
    args = parser.parse_args() 

    in_file = args.path 
    out_file = args.output 
    uuid = args.uuid 
    folder = os.path.join(tmpdir, uuid) 
    print folder  
    cwd = os.getcwd() 
    apikey_location = os.path.join(cwd, "api_key.txt") 

    with open(in_file) as f: 
     text = f.read() 

    alchemyObj = AlchemyAPI.AlchemyAPI() 
    alchemyObj.loadAPIKey(apikey_location) 

    result = alchemyObj.TextGetRankedNamedEntities(text) 

    root = ET.fromstring(result) 

    place_list = ['City', 'Continent', 'Country', 'Facility', 'GeographicFeature',\ 
    'Region', 'StateOrCounty'] 
    People = {} 
    Places = {} 
    Other = {} 

    for entity in root.getiterator('entity'): 
     if entity[0].text == 'Person': 
      People[entity[3].text]=[entity[1].text, entity[2].text] 
     elif entity[0].text in place_list: 
      Places[entity[3].text] = [entity[1].text, entity[2].text] 
     else: 
      Other[entity[3].text] = [entity[1].text, entity[2].text] 

    #print lists ordered by relevance 
    Places_s = sorted(Places, key = Places.get, reverse = True) 
    People_s = sorted(People, key = People.get, reverse = True) 
    Other_s = sorted(Other, key = Other.get, reverse = True) 

# here is where things seem to go awry 
    with codecs.open(out_file, mode = 'w', encoding='utf-8') as o: 
     listwrite(o, People_s) 
     listwrite(o, Places_s) 
     listwrite(o, Other_s) 
    out_file = os.path.join(folder, 'People') 
    with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
      listwrite(o, People_s) 
    out_file = os.path.join(folder, 'Places') 
     with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
     listwrite(o, Places_s) 
    out_file = os.path.join(folder, 'Other') 
    with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
      listwrite(o, Other_s) 
#================================================= 
if __name__ == '__main__': 
    main() 
+0

ändern Sie "% s \ n \ n" 'in' "% s \ n" 'in Ihrer' listwrite' Funktion –

Antwort

2
def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n\n" % item) 

Im Code wird listwrite als eine Funktion definiert. Für jede item in thelist schreibt es die item, gefolgt von zwei Newline-Zeichen. Um die zusätzliche Leitung zu entfernen, entfernen Sie einfach einen der \n s.

def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n" % item)