0 Daumen
120 Aufrufe

Ich habe versucht, die Cosinus-Ähnlichkeit zwischen zwei Spalten eines Datenrahmens mit spatial.distance.cosine zu erzeugen. Ich möchte mit diesen beiden Funktionen eine weitere Spalte erstellen:

    def cosine_sim(x):
        li = []
        for item in x["sent_emb"]:
            li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
        return li
   
    def predictions(train):
       
        train["cosine_sim"] = train.apply(cosine_sim, axis = 1)


Die zwei Spalten sehen so aus:

  sent_emb                                         quest_emb
    0 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.01491953, 0.021973763, 0.021364095, 0.0393...
    1 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.04444952, 0.028005758, 0.030357722, 0.0375...
    2 [[0.030376578, 0.044331014, 0.081356354, 0.062... [[0.03949683, 0.04509903, 0.018089347, 0.07667...
      ...


Aber ich habe einen TypeError, anscheinend sind einige Werte NoneType und float. Weißt du, wie ich die Daten dieser Art filtern kann, um sie auf zro oder zwei etwas zu setzen, das mich nicht daran hindert, meine zu verwenden


  TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')
   
    ---------------------------------------------------------------------------
    TypeError                                Traceback (most recent call last)
    <ipython-input-23-af28fc11a9d3> in <module>()
    ----> 1 predicted = predictions(train)
   
    <ipython-input-22-1699cf33d87c> in predictions(train)
          1 def predictions(train):
          2
    ----> 3    train["cosine_sim"] = train.apply(cosine_sim, axis = 1)
          4    train["diff"] = (train["quest_emb"] - train["sent_emb"])**2
          5    train["euclidean_dis"] = train["diff"].apply(lambda x: list(np.sum(x, axis = 1)))
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
      6012                          args=args,
      6013                          kwds=kwds)
    -> 6014        return op.get_result()
      6015
      6016    def applymap(self, func):
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in get_result(self)
        140            return self.apply_raw()
        141
    --> 142        return self.apply_standard()
        143
        144    def apply_empty_result(self):
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_standard(self)
        246
        247        # compute the result using the series generator
    --> 248        self.apply_series_generator()
        249
        250        # wrap results
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/pandas/core/apply.py in apply_series_generator(self)
        275            try:
        276                for i, v in enumerate(series_gen):
    --> 277                    results[i] = self.f(v)
        278                    keys.append(v.name)
        279            except Exception as e:
   
    <ipython-input-20-276aa09bc25e> in cosine_sim(x)
          2    li = []
          3    for item in x["sent_emb"]:
    ----> 4        li.append(spatial.distance.cosine(item,x["quest_emb"][0]))
          5    return li
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in cosine(u, v, w)
        742    # cosine distance is also referred to as 'uncentered correlation',
        743    #  or 'reflective correlation'
    --> 744    return correlation(u, v, w=w, centered=False)
        745
        746
   
    ~/Documents/programming/mybot/mybotenv/lib/python3.5/site-packages/scipy/spatial/distance.py in correlation(u, v, w, centered)
        693        u = u - umu
        694        v = v - vmu
    --> 695    uv = np.average(u * v, weights=w)
        696    uu = np.average(np.square(u), weights=w)
        697    vv = np.average(np.square(v), weights=w)
   
    TypeError: ("unsupported operand type(s) for *: 'NoneType' and 'float'", 'occurred at index 473')

von

Ein anderes Problem?

Stell deine Frage

Willkommen bei der Stacklounge! Stell deine Frage sofort und kostenfrei

x
Made by a lovely community