Dieses kleine Skript schreibt Schlüsselwörter in eine Datei, fügt aber zwischen jedem Schlüsselwort einen zusätzlichen Zeilenumbruch hinzu. Wie kann ich es stoppen? I.e. stattEntfernen Sie zusätzliche Zeilenumbrüche beim Schreiben in eine Datei
Apple
Banana
Crayon
Ich möchte
Apple
Banana
Crayon
Ich habe versucht, "listwrite" googeln aber nicht helfen.
Ich bin sicher, das ist eine sehr einfache Sache, aber ich kann es nicht herausfinden.
#!/usr/local/bin/python
###################################################
# nerv3.py
# Goal: Named entity recognition script to pull names/place from text
# called as python nerv3.py text_path_or_file
#
# Inputs:
# path - text file or directory containing text files
# output - output file name
# uuid
# Outputs:
# Output file written
# People, Places, Others files
#
###################################################
#gonna need to install AlchemyAPI
import AlchemyAPI
import argparse
import xml.etree.ElementTree as ET
import collections
import codecs
import os
#from IPython import embed
#=================================================
def listwrite(output_file,thelist):
for item in thelist:
item.encode('utf-8')
output_file.write("%s\n\n" % item)
#=================================================
def main():
tmpdir = "/tmp/pagekicker"
#personal api key saved as api_key.txt
parser = argparse.ArgumentParser()
parser.add_argument('path', help = "target file or directory for NER")
parser.add_argument('output', help = "target file for output")
parser.add_argument('uuid', help = "uuid")
args = parser.parse_args()
in_file = args.path
out_file = args.output
uuid = args.uuid
folder = os.path.join(tmpdir, uuid)
print folder
cwd = os.getcwd()
apikey_location = os.path.join(cwd, "api_key.txt")
with open(in_file) as f:
text = f.read()
alchemyObj = AlchemyAPI.AlchemyAPI()
alchemyObj.loadAPIKey(apikey_location)
result = alchemyObj.TextGetRankedNamedEntities(text)
root = ET.fromstring(result)
place_list = ['City', 'Continent', 'Country', 'Facility', 'GeographicFeature',\
'Region', 'StateOrCounty']
People = {}
Places = {}
Other = {}
for entity in root.getiterator('entity'):
if entity[0].text == 'Person':
People[entity[3].text]=[entity[1].text, entity[2].text]
elif entity[0].text in place_list:
Places[entity[3].text] = [entity[1].text, entity[2].text]
else:
Other[entity[3].text] = [entity[1].text, entity[2].text]
#print lists ordered by relevance
Places_s = sorted(Places, key = Places.get, reverse = True)
People_s = sorted(People, key = People.get, reverse = True)
Other_s = sorted(Other, key = Other.get, reverse = True)
# here is where things seem to go awry
with codecs.open(out_file, mode = 'w', encoding='utf-8') as o:
listwrite(o, People_s)
listwrite(o, Places_s)
listwrite(o, Other_s)
out_file = os.path.join(folder, 'People')
with codecs.open(out_file, mode= 'w', encoding='utf-8') as o:
listwrite(o, People_s)
out_file = os.path.join(folder, 'Places')
with codecs.open(out_file, mode= 'w', encoding='utf-8') as o:
listwrite(o, Places_s)
out_file = os.path.join(folder, 'Other')
with codecs.open(out_file, mode= 'w', encoding='utf-8') as o:
listwrite(o, Other_s)
#=================================================
if __name__ == '__main__':
main()
ändern Sie "% s \ n \ n" 'in' "% s \ n" 'in Ihrer' listwrite' Funktion –