比較均值分析思路
阿新 • • 發佈:2019-01-09
sql:
def SubDependentDataSql(self, DatabaseName, TableName, DependentVariable, IndependentVariable): """ DependentVariable 因變數 IndependentVariable 自變數 得到因變數的資料子資料: { N 均值, 標準差等 }""" subdependentdatasql = """ select * from ( select repDB.`{}` as `{}` ,COUNT(1) as N ,SUM(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as SUM ,STDDEV_SAMP(convert(Ifnull(repDB.`{}`,0),decimal(65,30))) as STD ,MAX(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as MAX ,MIN(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as MIN ,AVG(convert(Ifnull(repDB.`{}`, 0),decimal(65,30))) as AVG from {}.{}as repDB where trim(Ifnull(repDB.`{}`, '')) <> '' AND trim(Ifnull(repDB.`{}`, '')) <> '' group by repDB.`{}` ) as SUB order by CONVERT(SUB.`{}` , SIGNED) """.format( DependentVariable, DependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, IndependentVariable, DatabaseName, TableName, IndependentVariable, DependentVariable, DependentVariable, DependentVariable)
求中值前先查資料sql:
def MedianValueDataSql(self, DatabaseName, TableName, DependentVariable, IndependentVariable, optionID): """ 求中值sql, 因變數有幾個optionid, 就需要迴圈這條sql幾遍 :return: """ medianvaluedatasql = """ select Ifnull(repDB.`{}`, 0) as `{}` from {}.{} as repDB where trim(Ifnull(repDB.`{}`, '')) <> '' AND trim(Ifnull(repDB.`{}`, '')) = {} ORDER BY `{}`; """.format(IndependentVariable, IndependentVariable, DatabaseName, TableName, IndependentVariable, DependentVariable, optionID, IndependentVariable )
標準誤平均值
標準誤是樣本均值與總體均值的誤差估計
標準誤=標準差/sqrt(樣本量) (標準差就是樣本算出來的標準差)