python學習筆記之讀寫excel檔案
阿新 • • 發佈:2018-12-14
python 處理excel資料的兩種方式:
- 首選pandas庫裡
pandas.read_excel
函式,相對比較簡單 - 其次使用
xlrd
庫,感覺沒有pandas
好用 - 如何寫excel,後續更新
#coding=utf-8
"""
Created on Wed Nov 28 18:39:17 2018
@author: **
"""
import xlrd
import xlwt
import pandas
import numpy as np
def pandas_parse_xls(filename, imgname_col_index = [ 1], sub_index = None):
"""
reference:
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html#pandas.read_excel
func:
parse the xlsx file into the ndarray or list
args:
imgname_col_index:, list of int, refer the col index of image name field
mos_col_index: list of int, refer the col index of MOS field
std_index: list of ints, refer the col index of subjetive score area
return:
list of imgname, MOS and std_value
"""
#header=1,index_col=1,usecols=[1,3]
#DataFrame:Attributes and underlying data
#DataFrame.get_values()
#DataFrame.values:Return a Numpy representation of the DataFrame.
#區域對應的列索引列表,從第4列到第15列
if sub_index is None:
Sub_score_area = range(3,15)
#解析字元型資料欄位時,注意將‘unicode’型別轉為‘str’型,
imgname_list = pandas.read_excel(filename, parse_cols = imgname_col_index).astype('str').values
#將返回的陣列降維,並轉為列表
imgname_list = np.squeeze(imgname_list).tolist()
##讀取數值區域,計算每行的標準差,返回一維標準差##
#<class 'pandas.core.frame.DataFrame'>
sub_DataFrame = pandas.read_excel(filename, parse_cols = Sub_score_area)
# 將DataFrame型別轉為ndarray,二維陣列
sub_array = sub_DataFrame.values
#計算標準差
std_array = np.std(sub_array,axis=1,ddof=1)
return imgname_list, std_array
def xlrd_parse_xls(path_xls):
"""
reference: https://xlrd.readthedocs.io/en/latest/api.html
func:
#Cell object in the given row and column.
xlrd.sheet.cell(rowx,colx)
#Value of the cell in the given row,column.
xlrd.sheet.cell_value(rowx,colx)
#Returns a slice of the values of the cells in the given column.
xlrd.sheet.col_values(index_col) # the first index value is 1
#Returns a slice of the values of the cells in the given row.
xlrd.sheet.row_values(index_col)
"""
data = xlrd.open_workbook(path_xls)
# get sheet
table = data.sheets()[0] # equal to data.sheet_by_index(0)
#將‘unicode’轉為‘str’
name_list = [item.encode('utf-8') for item in table.col_values(1,1)]
score_list = table.col_values(15,1)
#print(name_list,score_list)
return name_list,score_list
#==============================================================================
# result = []
# #迴圈遍歷讀取
# row ,col = table.nrows ,table.ncols
# for i in range(1,2):
# for j in range(1,row):
# #get cell value
# temp_str = table.cell_value(j,i)
# print(temp_str)
# result.append(temp_str)
#==============================================================================
def write_xls(dest_xls):
#creat workbook
work_book = xlwt.Workbook(encoding = 'ascii')
work_sheet = work_book.add_sheet('sheet1')
work_sheet.write(0, 0, label = 'Row 0, Column 0 Value')
work_book.save('Excel_Workbook.xls')
if __name__=='__main__':
pandas_parse_xls('MOS.xlsx')
xlrd_parse_xls('MOS.xlsx')