Py学习  »  Python

我们如何在Python中创建来自不同数据帧的多个样本的平均值?

E_Sarousi • 3 年前 • 1308 次点击  

下午好

我妻子怀了一个女孩,她的胎龄大约是20周。我一直在尝试创建一个程序,从祖先那里提取我妻子的DNA。com和我一样,做了大量的样本(以防止异常值),然后对我们的女儿(每边50个)进行采样,基本上试图预测我们的基因属于她。

这是我的密码。此时,这些编码都不会返回任何错误:

M = ("England & Northwestern Europe," * 53) + ("Ireland," * 21) + ("Scotland," * 21) + ("Wales," * 5)
M = M.split(',')
M.pop()
len(M)

产量:100

E = ("European Jewish," * 53) + ("Southern Italy," * 31) + ("Levant," * 8) + ("Northern Africa," * 3) + ("Aegean Islands," * 2) + ("Cyprus," * 2) + ("Arabian Peninsula," * 1)
E = E.split(',')
E.pop()
len(E)

产量:100

M_df = pd.DataFrame(M)
E_df = pd.DataFrame(E)

这是我遇到麻烦的手机:

def sample(df1, df2, num): #df1 is for one df, df2 is for the second, and num is for the number of samples.
    df1_list = [] 
    df2_list = [] #To keep track of the different columns.
    combined_list = []
    for x in range(1, num+1):
        df1[x] = df1.sample(n=num)
        df2[x] = df1.sample(n=num) #To sample a certain amount of times.
        df1_list.append(df1[x])
        df2_list.append(df2[x]) #Intended to individually number each dataframe as a sample.
        combined_list = df1_list + df2_list #Creating a list of all dataframes together.
        Baby = pd.concat(combined_list) #An averaged list of all data frames together as a data frame. Essentially the intended output.
   
sample(E_df, M_df, 50)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
    350                 try:
--> 351                     return self._range.index(new_key)
    352                 except ValueError as err:

ValueError: 1 is not in range

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
   3825         try:
-> 3826             loc = self._info_axis.get_loc(key)
   3827         except KeyError:

~\anaconda3\lib\site-packages\pandas\core\indexes\range.py in get_loc(self, key, method, tolerance)
    352                 except ValueError as err:
--> 353                     raise KeyError(key) from err
    354             raise KeyError(key)

KeyError: 1

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-100-ed9a70bd28b1> in <module>
     11         Baby = pd.concat(combined_list)
     12 
---> 13 sample(E_df, M_df, 50)

<ipython-input-100-ed9a70bd28b1> in sample(df1, df2, num)
      5     for x in range(1, num+1):
      6         df1[x] = df1.sample(n=num)
----> 7         df2[x] = df1.sample(n=num)
      8         df1_list.append(df1[x])
      9         df2_list.append(df2[x])

~\anaconda3\lib\site-packages\pandas\core\frame.py in __setitem__(self, key, value)
   3161         else:
   3162             # set column
-> 3163             self._set_item(key, value)
   3164 
   3165     def _setitem_slice(self, key: slice, value):

~\anaconda3\lib\site-packages\pandas\core\frame.py in _set_item(self, key, value)
   3241         self._ensure_valid_index(value)
   3242         value = self._sanitize_column(key, value)
-> 3243         NDFrame._set_item(self, key, value)
   3244 
   3245         # check if we are modifying a copy

~\anaconda3\lib\site-packages\pandas\core\generic.py in _set_item(self, key, value)
   3827         except KeyError:
   3828             # This item wasn't present, just insert at end
-> 3829             self._mgr.insert(len(self._info_axis), key, value)
   3830             return
   3831 

~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in insert(self, loc, item, value, allow_duplicates)
   1201             value = safe_reshape(value, (1,) + value.shape)
   1202 
-> 1203         block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
   1204 
   1205         for blkno, count in _fast_count_smallints(self.blknos[loc:]):

~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in make_block(values, placement, klass, ndim, dtype)
   2740         values = DatetimeArray._simple_new(values, dtype=dtype)
   2741 
-> 2742     return klass(values, ndim=ndim, placement=placement)
   2743 
   2744 

~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in __init__(self, values, placement, ndim)
    140 
    141         if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
--> 142             raise ValueError(
    143                 f"Wrong number of items passed {len(self.values)}, "
    144                 f"placement implies {len(self.mgr_locs)}"

ValueError: Wrong number of items passed 2, placement implies 1

任何形式的帮助都将不胜感激。非常感谢。

Python社区是高质量的Python/Django开发社区
本文地址:http://www.python88.com/topic/132560
 
1308 次点击  
文章 [ 1 ]  |  最新文章 3 年前
Corralien
Reply   •   1 楼
Corralien    3 年前

尝试:

def sample(df1, df2, num):
    df1_list = [] 
    df2_list = []
    for x in range(1, num+1):
        df1_list.append(df1.sample(n=num))
        df2_list.append(df2.sample(n=num))
    combined_list = df1_list + df2_list
    Baby = pd.concat(combined_list)
    return Baby

# I don't use copy() here because I don't modify E_df or M_df
baby = sample(E_df, M_df, 50)