0
Data Science Last Project: Data Science - Pandas Pandas Pandas
My solution gives 4 correct out of 5 test cases, can someone help me out? Assume that there are two clusters among the given two-dimensional data points and two random points (0, 0), and (2, 2) are the initial cluster centroids. Calculate the euclidean distance between each data point and each of the centroid, assign each data point to its nearest centroid, then calculate the new centroid. If there's a tie, assign the data point to the cluster with centroid (0, 0). If none of the data points were assigned to the given centroid, return None.
5 Answers
+ 2
#Thanks John Robotane
n = int(input())
import numpy as np
def eudt(lista,listb):
lista,listb=np.array(lista), np.array(listb)
diff=lista-listb
return ((diff[0]**2)+(diff[1]**2))**0.5
w=np.zeros((n,2))
for i in range(n):
w[i,]=[float(j) for j in input().split()]
distances=np.zeros((n,2))
for i in range(n):
distances[i,]=[eudt(w[i,],[0,0]),eudt(w[i,],[2,2])]
team0=w[(distances[:,0]<=distances[:,1]),]
team2=w[(distances[:,0]>distances[:,1]),]
if sum(distances[:,0]<=distances[:,1])!=0:
zeromn=np.around(team0.mean(axis=0),2)
if sum(distances[:,0]>distances[:,1])!=0:
twoavg=np.around(team2.mean(axis=0),2)
if sum(distances[:,0]<=distances[:,1])==0:
zeromn=None
elif sum(distances[:,0]>distances[:,1])==0:
twoavg=None
else:
pass
print(zeromn)
print(twoavg)
+ 2
Please like copy my answer
+ 1
import numpy as np
def distance(l1,l2):
x1, y1 = l1
x2, y2 = l2
dis = np.sqrt(((x1-x2)**2)+((y1-y2)**2))
return dis
def new_Centroid(pC):
final_cen=[]
for idx,cen in enumerate(pC):
if len(cen)>1 and (None not in cen):
centx=0
centy=0
for jt in cen:
centx += jt[0]
centy += jt[1]
final_cen.append([centx/len(cen), centy/len(cen)])
elif None in cen:
final_cen.append(None)
else:
final_cen.append(cen[0])
return final_cen[0], final_cen[1]
centroids = [[0,0], [2,2]]
n = int(input())
nums=[[float(j) for j in input().split()] for i in range(n)]
output= [[distance(centroid,no) for centroid in centroids] for no in nums ]
pC1=[]
pC2=[]
for ind,dat in enumerate(output):
if (dat[0]< dat[1]) or (dat[0]==dat[1]):
pC1.append(nums[ind])
else:
pC2.append(nums[ind])
if len(pC1)<1:
pC1.append(None)
elif len(pC2)<1:
pC2.append(None)
ans = np.round(new_Centroid([pC1, pC2]),2)
print(ans[0])
print(ans[1])
+ 1
Update :
I got it, here is added part for anyone looking for an answer:
ans = new_Centroid([pC1, pC2])
if None in ans:
nval = ans.index(None)
a1=np.round(ans[1-nval],2)
a2=ans[nval]
print(a1)
print(a2)
else:
ans = np.round(new_Centroid([pC1, pC2]),2)
print(ans[0])
print(ans[1])
+ 1
I don't know why my code gives only 4 out of 5 test cases. Can someone help me find the bug in my code? Thanks!
n = int(input())
#print(n)
import numpy as np
import pandas as pd
#x,y = [int(m) for m in input().split()]
X = []
for i in range(1,n+1):
X.append([float(x) for x in input().split()])
#x = [(i, j) for i, j in input().split()]
Y = np.array(X)
#The 2 centroids for clusters 0 and 1
Xc0 = np.array([0, 0])
Xc1 = np.array([2, 2])
#False formula for Euxlidean distance
Y1 = np.sqrt((Xc0-Xc1)**2).sum()
#True formula for Euclidean distance
Y2 = np.sqrt(((Xc0-Xc1)**2).sum())
area0 = []
area1 = []
for dps in X:
if np.sqrt(((dps-Xc0)**2).sum()) <= np.sqrt(((dps-Xc1)**2).sum()):
area0.append(dps)
else:
area1.append(dps)
cluster0 = np.array(area0)
cluster1 = np.array(area1)
#print(area0.sum()/area0.len())
print(cluster0.mean(axis = 0).round(2))
print(cluster1.mean(axis = 0).round(2))
#print(Y1)