Ich habe ein Programm, das auf k means-Algorithmus basiert. Wenn ich das Programm auf Python 2 ausführen, gibt es keine Probleme. Wenn ich es auf Python 3 zu verwenden, habe ich die folgenden Fehlermeldungen erhalten:Python 2 bis 3 Fehler. Typeerror: unorderable Typen: int() <= str()
Traceback (most recent call last):
File "kmeans.py", line 111, in <module>
main()
File "kmeans.py", line 13, in main
clusters = kmeans(points, num_cluster, cutoff)
File "kmeans.py", line 67, in kmeans
initial = random.sample(points, k) # generating k random points for initial centroids
File "/afs/cad/linux/anaconda3/anaconda/pkgs/python-3.5.2-1/lib/python3.5/random.py", line 314, in sample
if not 0 <= k <= n:
TypeError: unorderable types: int() <= str()
Dies ist der Code habe ich so weit, ich kann nicht scheinen, um herauszufinden, wie zu beheben.
import sys
import math
import random
def main():
points = readDataFile("datafile.txt")
print ('Original dataset: \n' + str(points) + '\n')
# generating clusters
# num_cluster = 2 # k for default testing value
num_cluster = input ('Please declare number of clusters k: ') #k
cutoff = 0.5 # iter cut off
clusters = kmeans(points, num_cluster, cutoff)
# Print our clusters
for i,c in enumerate(clusters):
print ("Cluster " + str(i) + "\t" + str(c))
# reading in data points from file
def readDataFile(filename):
points = []
lines = [line.rstrip('\n') for line in open(filename)]
for line in lines:
pieces = line.split(' ')
points.append(Point([int(pieces[0]), int(pieces[1])]))
return points
# point class to contain a set of 2d coordinates
class Point:
def __init__(self, coords):
self.coords = coords
self.n = len(coords)
def __repr__(self):
return str(self.coords)
# cluster class to define cluster functionality
class Cluster:
# constructor
def __init__(self, points):
self.points = points
self.n = points[0].n
self.centroid = self.calculateCentroid()
# to string method
def __repr__(self):
return str(self.points)
# updates the current loc
def update(self, points):
old_centroid = self.centroid
self.points = points
self.centroid = self.calculateCentroid()
shift = getDistance(old_centroid, self.centroid)
return shift
# calculates new location of centroid based on mean
def calculateCentroid(self):
numPoints = len(self.points) # finding center point
coords = [p.coords for p in self.points] # list of all coordinates in curr cluster
unzipped = zip(*coords)
centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped] # mean for each point
return Point(centroid_coords)
# kmean algo to cluster data
def kmeans(points, k, cutoff):
initial = random.sample(points, k) # generating k random points for initial centroids
clusters = [Cluster([p]) for p in initial] # creating k clusters using generated centroids
loopCounter = 0 # looping thru data until the clusters stabilize
while True:
# list of lists to hold point objects
lists = [ [] for c in clusters]
clusterCount = len(clusters)
loopCounter += 1
for p in points:
# dist bw curr to first centroid
smallest_distance = getDistance(p, clusters[0].centroid)
clusterIndex = 0
for i in range(clusterCount - 1):
# calc point to point diff in distances
distance = getDistance(p, clusters[i+1].centroid)
# setting cluster index based on dist
if distance < smallest_distance:
smallest_distance = distance
clusterIndex = i+1
lists[clusterIndex].append(p) # appending point to cluster
biggest_shift = 0.0 # resetting biggest_shift to zero for curr iteration
for i in range(clusterCount):
# calc centroid movement dist
shift = clusters[i].update(lists[i])
# keeping track of the largest move from all cluster centroid updates
biggest_shift = max(biggest_shift, shift)
# checking if centroids movement is not vast (convergence)
if biggest_shift < cutoff:
break
return clusters
# generates euclidean distance between two points
def getDistance(a, b):
ret = reduce(lambda x,y: x + pow((a.coords[y]-b.coords[y]), 2),range(a.n),0.0)
return math.sqrt(ret)
# init
if __name__ == "__main__":
main()
Auch meine data.txt Datei sieht wie folgt aus:
0 0
0 1
1 0
10 10
10 11
11 10
11 11
Jede Hilfe wäre sehr dankbar.
'Eingang()' einen String zurückgibt, nicht eine ganze Zahl. –