Python2簡單任務程式
阿新 • • 發佈:2018-12-19
fp = open('spider.log', 'r')
fo = open('ans0201.csv', 'w')
for i in fp:
if 'bor' in i:
tmp1 = i.split(',')
tmp2 = tmp1[2].split(';')
wr = ','.join([tmp2[0], tmp2[1], tmp2[2], tmp2[7]])
fo.write(wr + '\n')
fo.close()
fp.close()
import re
import urllib2
html = urllib2. urlopen('file:///C:/Users/jxx/gongcheng/arg/task0202/movie_review.htm').read()
tmp = re.findall('<span class="subject-rate">(\d*?\.\d*?)</span>', html)
pf = [float(x) for x in tmp]
avg = sum(pf) / len(pf)
fo = open('ans0202.txt', 'w')
fo.write('%.4f' % avg)
fo.close()
# coding:utf-8
import pandas as pd
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [0, 1, 2, 7]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
dc = db[db[0] == '《衝上雲霄》']
time = (dc[2].max() - dc[1].min()).days + 1
avg = sum(dc[7]) / time
fo = open('ans0301.dat' , 'w')
fo.write('%d' % time + ',%.6f' % avg)
fo.close()
# coding:utf-8
import os
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [0, 1, 2, 7]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
movie = ['《將錯就錯》', '《衝上雲霄》', '《萬物生長》']
pf = []
for i in movie:
dc = db[db[0] == i]
time = (dc[2].max() - dc[1].min()).days + 1
week = time / 7 + ((time % 7) > 0)
pf.append(sum(dc[7]) / week)
pf2 = sorted(pf, reverse=True)
fo = open('ans0302.dat', 'w')
fo.write('%.6f' % pf2[0] + ',%.6f' % pf2[1] + ',%.6f' % pf2[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.xlabel(u'電影名稱')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'影片周平均票房直方圖')
plt.xticks((range(3)), (u'《將錯就錯》', u'《衝上雲霄》', u'《萬物生長》'))
plt.bar(left=(range(3)), height=pf, width=0.35, align='center')
plt.savefig('ans0302.png')
os.rename('ans0302.png', 'ans0302.jpg')
# coding:utf-8
import os
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [0, 1, 2, 7]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
db[12] = (db[2] - db[1]).dt.days + 1
db[712] = db[7] / db[12]
def ans(x):
dc = db[db[0] == x]
time = (dc[2].max() - dc[1].min()).days + 1
pf = []
for i in range(0, len(range(time)), 7):
pf.append(len(range(time)[i:i + 7]) * sum(dc[712]))
return pf
fo = open('ans0303.dat', 'w')
fo.write('%.6f' % ans('《將錯就錯》')[0] + ',%.6f' % ans('《衝上雲霄》')[1] + ',%.6f' % ans('《萬物生長》')[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.xlabel(u'時間(周)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'影片周票房變化折線圖')
plt.plot(ans('《將錯就錯》'), '-ob', label=u'《將錯就錯》')
plt.plot(ans('《衝上雲霄》'), '--og', label=u'《衝上雲霄》')
plt.plot(ans('《萬物生長》'), ':or', label=u'《萬物生長》')
plt.xticks(range(7))
plt.legend(loc=1)
plt.savefig('ans0303.png')
os.rename('ans0303.png', 'ans0303.jpg')
# coding:utf-8
import os
import datetime
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [1, 2, 7, 8]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
db[12] = (db[2] - db[1]).dt.days + 1
db[712] = db[7] / db[12]
def ans(x):
dc = db[db[8] == x]
dd = dc[((dc[1] < datetime.datetime(2016, 4, 1)) & (dc[2] >= datetime.datetime(2016, 1, 1)))]
dd1 = dd[(dd[2] <= datetime.datetime(2016, 2, 1))]
dd2 = dd[((dd[2] <= datetime.datetime(2016, 3, 1)) & (dd[2] > datetime.datetime(2016, 2, 1)))]
dd3 = dd[(dd[2] > datetime.datetime(2016, 3, 1))]
pf1 = (dd1[2] - datetime.datetime(2016, 1, 1)).dt.days * dd1[712]
pf2 = ((dd2[2] - datetime.datetime(2016, 2, 1)).dt.days + 31) * dd2[712]
pf3 = ((dd3[2] - datetime.datetime(2016, 3, 1)).dt.days + 59) * dd3[712]
return sum(pf1), sum(pf2), sum(pf3)
fo = open('ans0304.dat', 'w')
fo.write('%.6f' % ans('武漢')[0] + ',%.6f' % ans('武漢')[1] + ',%.6f' % ans('武漢')[2] +
'\n%.6f' % ans('長沙')[0] + ',%.6f' % ans('長沙')[1] + ',%.6f' % ans('長沙')[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.subplot(121)
plt.plot(ans('武漢'), '-ob')
plt.xticks(range(3))
plt.xlabel(u'時間(月)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'武漢 2016 1-3 BOR')
plt.subplot(122)
plt.plot(ans('長沙'), ':or')
plt.xticks(range(3))
plt.xlabel(u'時間(月)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'長沙 2016 1-3 BOR')
plt.savefig('ans0304.png')
os.rename('ans0304.png', 'ans0304.jpg')
# coding:utf-8
import pandas as pd
dda = pd.read_csv('score.log', sep=',')
print '最大值為:%.2f' % dda['score'].max() + ',最小值為:%.2f' % dda['score'].min() +\
',中位數為:%.2f' % dda['score'].median() + ',平均值為:%.2f' % dda['score'].mean()
fo = open('ans0400.dat', 'w')
fo.write('%.2f' % dda['score'].max() + ',%.2f' % dda['score'].min() +
',%.2f' % dda['score'].median() + ',%.2f' % dda['score'].mean())
fo.close()
da = pd.read_csv('film-csv.txt', sep=';')
lx1 = da['影片型別'].dropna(axis=0).values
lx2 = []
lx = []
for i in lx1:
line = i.split('/')
for i2 in line:
line2 = i2.split(',')
for i3 in line2:
line3 = i3.split('、')
for i4 in line3:
line4 = i4.split(' ')
for i5 in line4:
lx2.append(i5)
for i6 in range(len(lx2)):
if len(lx2[i6]) == 6:
lx.append(lx2[i6])
lx = list(set(lx))
n1b = da.loc[:, ['影片型別', '票房/萬']].dropna(axis=0)
n1pf = []
for i in range(len(lx)):
n1c = n1b[n1b['影片型別'].str.contains(lx[i])]
n1pf.append(n1c['票房/萬'].sum())
n2b = da.loc[:, ['導演', '票房/萬']].dropna(axis=0)
dy = n2b['導演'].value_counts().index.values
n2pf = []
for i in range(len(dy)):
n2c = n2b[n2b['導演'].str.contains(dy[i])]
n2pf.append(n2c['票房/萬'].sum())
n3b = da.loc[:, ['導演', '影片型別']].dropna(axis=0)
n3lx = []
for i in range(len(lx)):
n3c = n3b[n3b['影片型別'].str.contains(lx[i<