1. 程式人生 > 實用技巧 >Day 78 量化投資與Python——Pandas

Day 78 量化投資與Python——Pandas

量化投資與Python——pandas

簡介:

案例

# -*- coding: utf-8 -*-

# @File    : pandas-基礎.py
# @Date    : 2020-06-09
# @Author  : Administrator

import pandas as pd
import dateutil
from datetime import datetime

# ################ Series ################
a = pd.Series([4, 5, 6, 3])
# print(a)
# 0    4
# 1    5
# 2 6 # 3 3 # dtype: int64 b = pd.Series([4, 5, 6, 3], index=['a', 'b', 'c', 'd']) print(b) # a 4 # b 5 # c 6 # d 3 # dtype: int64 # 既可以通過下標訪問,也可以通過 key 來訪問 # print(b[2],b['c']) # 6 6 # 按照標籤進行累加 c = pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd']) d = pd.Series([1, 2, 3, 4], index=['
b', 'c', 'a', 'd']) print(c+d) # a 4 # b 3 # c 5 # d 8 # dtype: int64 # 如果兩個列表的標籤不一致,則只累加標籤一致的結果,其它為空 e = pd.Series([1, 2, 3, 4], index=['b', 'b', 'a', 'd']) print(e+c) # a 4.0 # b 3.0 # b 4.0 # c NaN # d 8.0 # dtype: float64 # ################ Series ################ f = pd.DataFrame({'
one': [1, 2, 3], 'two': [4, 5, 6]}) print(f) # one two # 0 1 4 # 1 2 5 # 2 3 6 g = pd.DataFrame({'one': [1, 2, 3], 'two': [4, 5, 6]}, index=['a', 'b', 'c']) print(g) # one two # a 1 4 # b 2 5 # c 3 6 h = pd.DataFrame({'one': pd.Series([1, 2, 3], index=['a', 'b', 'c']),'two': pd.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}) print(h) # one two # a 1.0 1 # b 2.0 2 # c 3.0 3 # d NaN 4 print(h.keys()) # Index(['one', 'two'], dtype='object') print(h.values) # [[ 1. 1.] # [ 2. 2.] # [ 3. 3.] # [nan 4.]] print(h.T) # a b c d # one 1.0 2.0 3.0 NaN # two 1.0 2.0 3.0 4.0 print(h.describe()) # one two # count 3.0 4.000000 # mean 2.0 2.500000 # std 1.0 1.290994 # min 1.0 1.000000 # 25% 1.5 1.750000 # 50% 2.0 2.500000 # 75% 2.5 3.250000 # max 3.0 4.000000 # 時間處理 now = datetime.now() time = dateutil.parser.parser('2001-01-01') print(time) # <dateutil.parser._parser.parser object at 0x0000025D7A7A05C0> pd_time = pd.to_datetime([now]) print(pd_time) # DatetimeIndex(['2020-06-28 13:43:48.554969'], dtype='datetime64[ns]', freq=None) # 批量生成時間佇列 time_list = pd.date_range('2020-01-01',periods=30,freq='W-MON') print(time_list) # DatetimeIndex(['2020-01-06', '2020-01-13', '2020-01-20', '2020-01-27', # '2020-02-03', '2020-02-10', '2020-02-17', '2020-02-24', # '2020-03-02', '2020-03-09', '2020-03-16', '2020-03-23', # '2020-03-30', '2020-04-06', '2020-04-13', '2020-04-20', # '2020-04-27', '2020-05-04', '2020-05-11', '2020-05-18', # '2020-05-25', '2020-06-01', '2020-06-08', '2020-06-15', # '2020-06-22', '2020-06-29', '2020-07-06', '2020-07-13', # '2020-07-20', '2020-07-27'], # dtype='datetime64[ns]', freq='W-MON')

Series

DataFrame