1. 程式人生 > >Python2簡單任務程式

Python2簡單任務程式

fp = open('spider.log', 'r')
fo = open('ans0201.csv', 'w')
for i in fp:
    if 'bor' in i:
        tmp1 = i.split(',')
        tmp2 = tmp1[2].split(';')
        wr = ','.join([tmp2[0], tmp2[1], tmp2[2], tmp2[7]])
        fo.write(wr + '\n')
fo.close()
fp.close()
import re
import urllib2
html = urllib2.
urlopen('file:///C:/Users/jxx/gongcheng/arg/task0202/movie_review.htm').read() tmp = re.findall('<span class="subject-rate">(\d*?\.\d*?)</span>', html) pf = [float(x) for x in tmp] avg = sum(pf) / len(pf) fo = open('ans0202.txt', 'w') fo.write('%.4f' % avg) fo.close()
# coding:utf-8
import pandas as pd
da =
pd.read_csv('film_log3.csv', header=None, sep=';') db = da.loc[:, [0, 1, 2, 7]] db[1] = pd.to_datetime(db[1]) db[2] = pd.to_datetime(db[2]) db[7] = db[7].str.split(')').str[1].astype('float') dc = db[db[0] == '《衝上雲霄》'] time = (dc[2].max() - dc[1].min()).days + 1 avg = sum(dc[7]) / time fo = open('ans0301.dat'
, 'w') fo.write('%d' % time + ',%.6f' % avg) fo.close()
# coding:utf-8
import os
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [0, 1, 2, 7]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
movie = ['《將錯就錯》', '《衝上雲霄》', '《萬物生長》']
pf = []
for i in movie:
    dc = db[db[0] == i]
    time = (dc[2].max() - dc[1].min()).days + 1
    week = time / 7 + ((time % 7) > 0)
    pf.append(sum(dc[7]) / week)
pf2 = sorted(pf, reverse=True)
fo = open('ans0302.dat', 'w')
fo.write('%.6f' % pf2[0] + ',%.6f' % pf2[1] + ',%.6f' % pf2[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.xlabel(u'電影名稱')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'影片周平均票房直方圖')
plt.xticks((range(3)), (u'《將錯就錯》', u'《衝上雲霄》', u'《萬物生長》'))
plt.bar(left=(range(3)), height=pf, width=0.35, align='center')
plt.savefig('ans0302.png')
os.rename('ans0302.png', 'ans0302.jpg')
# coding:utf-8
import os
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [0, 1, 2, 7]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
db[12] = (db[2] - db[1]).dt.days + 1
db[712] = db[7] / db[12]
def ans(x):
    dc = db[db[0] == x]
    time = (dc[2].max() - dc[1].min()).days + 1
    pf = []
    for i in range(0, len(range(time)), 7):
        pf.append(len(range(time)[i:i + 7]) * sum(dc[712]))
    return pf
fo = open('ans0303.dat', 'w')
fo.write('%.6f' % ans('《將錯就錯》')[0] + ',%.6f' % ans('《衝上雲霄》')[1] + ',%.6f' % ans('《萬物生長》')[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.xlabel(u'時間(周)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'影片周票房變化折線圖')
plt.plot(ans('《將錯就錯》'), '-ob', label=u'《將錯就錯》')
plt.plot(ans('《衝上雲霄》'), '--og', label=u'《衝上雲霄》')
plt.plot(ans('《萬物生長》'), ':or', label=u'《萬物生長》')
plt.xticks(range(7))
plt.legend(loc=1)
plt.savefig('ans0303.png')
os.rename('ans0303.png', 'ans0303.jpg')
# coding:utf-8
import os
import datetime
import pandas as pd
import matplotlib.pyplot as plt
da = pd.read_csv('film_log3.csv', header=None, sep=';')
db = da.loc[:, [1, 2, 7, 8]]
db[1] = pd.to_datetime(db[1])
db[2] = pd.to_datetime(db[2])
db[7] = db[7].str.split(')').str[1].astype('float')
db[12] = (db[2] - db[1]).dt.days + 1
db[712] = db[7] / db[12]
def ans(x):
    dc = db[db[8] == x]
    dd = dc[((dc[1] < datetime.datetime(2016, 4, 1)) & (dc[2] >= datetime.datetime(2016, 1, 1)))]
    dd1 = dd[(dd[2] <= datetime.datetime(2016, 2, 1))]
    dd2 = dd[((dd[2] <= datetime.datetime(2016, 3, 1)) & (dd[2] > datetime.datetime(2016, 2, 1)))]
    dd3 = dd[(dd[2] > datetime.datetime(2016, 3, 1))]
    pf1 = (dd1[2] - datetime.datetime(2016, 1, 1)).dt.days * dd1[712]
    pf2 = ((dd2[2] - datetime.datetime(2016, 2, 1)).dt.days + 31) * dd2[712]
    pf3 = ((dd3[2] - datetime.datetime(2016, 3, 1)).dt.days + 59) * dd3[712]
    return sum(pf1), sum(pf2), sum(pf3)
fo = open('ans0304.dat', 'w')
fo.write('%.6f' % ans('武漢')[0] + ',%.6f' % ans('武漢')[1] + ',%.6f' % ans('武漢')[2] +
         '\n%.6f' % ans('長沙')[0] + ',%.6f' % ans('長沙')[1] + ',%.6f' % ans('長沙')[2])
fo.close()
plt.rc('font', family='SimHei', size=8)
plt.subplot(121)
plt.plot(ans('武漢'), '-ob')
plt.xticks(range(3))
plt.xlabel(u'時間(月)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'武漢 2016 1-3 BOR')
plt.subplot(122)
plt.plot(ans('長沙'), ':or')
plt.xticks(range(3))
plt.xlabel(u'時間(月)')
plt.ylabel(u'票房收入(萬元)')
plt.title(u'長沙 2016 1-3 BOR')
plt.savefig('ans0304.png')
os.rename('ans0304.png', 'ans0304.jpg')
# coding:utf-8
import pandas as pd
dda = pd.read_csv('score.log', sep=',')
print '最大值為:%.2f' % dda['score'].max() + ',最小值為:%.2f' % dda['score'].min() +\
      ',中位數為:%.2f' % dda['score'].median() + ',平均值為:%.2f' % dda['score'].mean()
fo = open('ans0400.dat', 'w')
fo.write('%.2f' % dda['score'].max() + ',%.2f' % dda['score'].min() +
         ',%.2f' % dda['score'].median() + ',%.2f' % dda['score'].mean())
fo.close()
da = pd.read_csv('film-csv.txt', sep=';')
lx1 = da['影片型別'].dropna(axis=0).values
lx2 = []
lx = []
for i in lx1:
    line = i.split('/')
    for i2 in line:
        line2 = i2.split(',')
        for i3 in line2:
            line3 = i3.split('、')
            for i4 in line3:
                line4 = i4.split(' ')
                for i5 in line4:
                    lx2.append(i5)
                    for i6 in range(len(lx2)):
                        if len(lx2[i6]) == 6:
                            lx.append(lx2[i6])
lx = list(set(lx))
n1b = da.loc[:, ['影片型別', '票房/萬']].dropna(axis=0)
n1pf = []
for i in range(len(lx)):
    n1c = n1b[n1b['影片型別'].str.contains(lx[i])]
    n1pf.append(n1c['票房/萬'].sum())
n2b = da.loc[:, ['導演', '票房/萬']].dropna(axis=0)
dy = n2b['導演'].value_counts().index.values
n2pf = []
for i in range(len(dy)):
    n2c = n2b[n2b['導演'].str.contains(dy[i])]
    n2pf.append(n2c['票房/萬'].sum())
n3b = da.loc[:, ['導演', '影片型別']].dropna(axis=0)
n3lx = []
for i in range(len(lx)):
    n3c = n3b[n3b['影片型別'].str.contains(lx[i<