Ich habe versucht, die Cosinus-Ähnlichkeit zwischen zwei Spalten eines Datenrahmens mit spatial.distance.cosine zu erzeugen. Ich möchte mit diesen beiden Funktionen eine weitere Spalte erstellen:
def cosine_sim(x):
li = []
for item in x["sent_emb"]:
li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
return li
def predictions(train):
train["cosine_sim"] = train.apply(cosine_sim, axis = 1)
Die zwei Spalten sehen so aus:
sent_emb quest_emb
0 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.01491953, 0.021973763, 0.021364095, 0.0393...
1 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.04444952, 0.028005758, 0.030357722, 0.0375...
2 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.03949683, 0.04509903, 0.018089347, 0.07667...
...
Aber ich habe einen TypeError, anscheinend sind einige Werte NoneType und float. Weißt du, wie ich die Daten dieser Art filtern kann, um sie auf zro oder zwei etwas zu setzen, das mich nicht daran hindert, meine zu verwenden
TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-23-af28fc11a9d3> in <module>()
----> 1 predicted = predictions(train)
<ipython-input-22-1699cf33d87c> in predictions(train)
1 def predictions(train):
2
----> 3 train["cosine_sim"] = train.apply(cosine_sim, axis = 1)
4 train["diff"] = (train["quest_emb"] - train["sent_emb"])**2
5 train["euclidean_dis"] = train["diff"].apply(lambda x: list(np.sum(x, axis = 1)))
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-20-276aa09bc25e> in cosine_sim(x)
2 li = []
3 for item in x["sent_emb"]:
----> 4 li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
5 return li
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in cosine(u, v, w)
742 # cosine distance is also referred to as 'uncentered correlation',
743 # or 'reflective correlation'
--> 744 return correlation(u, v, w=w, centered=False)
745
746
~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in correlation(u, v, w, centered)
693 u = u - umu
694 v = v - vmu
--> 695 uv = np.average(u * v, weights=w)
696 uu = np.average(np.square(u), weights=w)
697 vv = np.average(np.square(v), weights=w)
TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')