1. 程式人生 > 其它 >第1章 1.6 從結構化字串中提取資料

第1章 1.6 從結構化字串中提取資料

一、例子:

>>> import delorean

>>> from decimal import Decimal

>>> 

>>> log = '[2018-05-05T11:07:12.267897] - SALE - PRODUCT: 1345 - PRICE: $09.99'

>>> devide_it = log.split(' - ')

>>> print(devide_it)

['[2018-05-05T11:07:12.267897]', 'SALE', 'PRODUCT: 1345', 'PRICE: $09.99']

>>> timestamp_string, _, product_string, price_string = devide_it

>>> timestamp = delorean.parse(timestamp_string.strip('[]'))

#string.strip('[]')可以將欄位串前後的'['或者']'去掉,如下:

>>> print(timestamp_string.strip('[]'))

2018-05-05T11:07:12.267897

>>> product_id = int(product_string.split(': ')[-1])

>>> price = Decimal(price_string.split('$')[-1])

>>> timestamp, product_id, price

(Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC'), 1345, Decimal('9.99'))

 

 

>>> print(timestamp)

Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC')

>>> timestamp.shift('Asia/Shanghai')

Delorean(datetime=datetime.datetime(2018, 5, 5, 19, 7, 12, 267897), timezone='Asia/Shanghai')

#通過shift('Asia/Shanghai')轉換成上海時區

>>> timestamp.datetime

datetime.datetime(2018, 5, 5, 19, 7, 12, 267897, tzinfo=<DstTzInfo 'Asia/Shanghai' CST+8:00:00 STD>)

>>> timestamp.date

datetime.date(2018, 5, 5)

>>> print(timestamp.date)

2018-05-05

#提取日期,但未找到提取時間的方法

>>> date_string = timestamp.date

>>> date_string

datetime.date(2018, 5, 5)

>>> print(date_string)

2018-05-05

>>> str(date_string)

'2018-05-05'

>>> print(timestamp.datetime)

2018-05-05 19:07:12.267897+08:00

#可以通過將datetime轉成字串之後,通過擷取的方式取時間要素

 

>>> log = '[2018-05-05 11:07:12.267897]'

>>> timestamp = delorean.parse(log.strip('[]'))

>>> print(timestamp)

Delorean(datetime=datetime.datetime(2018, 5, 5, 11, 7, 12, 267897), timezone='UTC')

>>> import datetime

>>> timestamp += datetime.timedelta(hours=2)

#增加2個小時

>>> print(timestamp)

Delorean(datetime=datetime.datetime(2018, 5, 5, 13, 7, 12, 267897), timezone='UTC')

>>> timestamp += datetime.timedelta(days=-2)

#增加2天

>>> print(timestamp)

Delorean(datetime=datetime.datetime(2018, 5, 3, 13, 7, 12, 267897), timezone='UTC')

 

 

>>> delorean.parse('2018-05-06')

Delorean(datetime=datetime.datetime(2018, 6, 5, 0, 0), timezone='UTC')

#預設天數在月份之前,所以預設得到的日期不準確

>>> delorean.parse('2018-05-06', dayfirst=False)

Delorean(datetime=datetime.datetime(2018, 5, 6, 0, 0), timezone='UTC')

#通過dayfirst=False引數後可以得到正確的日期

 

>>> d = delorean.Delorean()

>>> print(d)

Delorean(datetime=datetime.datetime(2022, 4, 8, 12, 29, 9, 273825), timezone='UTC')

>>> d = d.shift('Asia/Shanghai')

>>> print(d)

Delorean(datetime=datetime.datetime(2022, 4, 8, 20, 29, 9, 273825), timezone='Asia/Shanghai')