.py小程式總結

阿新 • • 發佈：2020-12-21

首先pycharm裡面的程式碼格式化，也就是所說的美化快捷鍵【Ctrl+Alt+L】

1、IP地址由範圍打印出每個ip地址，業務中有太多的ip需要整理，所以批量處理一下，已做到一步完成大規模的重複步驟

# -*-coding:utf-8-*-
q = open('jieguo.txt', 'w')
f = open('addredd.txt')
for line in f.readlines():
    line = line.strip()
    line = line.split('-')
    val1 = line[0].split('.')
    val2 = line[1].split(' 
.')
    title = '%s.%s.%s.' % (val1[0], val1[1], val1[2])
    ip_start = val1[3]
    ip_end = val2[3]
    # print(title,ip_strat,ip_end)
    for i in range(int(ip_start), int(ip_end) + 1):
        # my_dict = ['%s,%s'%ip_strat,ip_end]
        # print("%s%s" %(title,i))
        q.write('%s%s\n' % (title, i))

2、天氣網北京昌平地區的天氣情況

# 天氣網昌平地區爬蟲案例
import requests
from lxml import etree


class WeatherSpider:

    def __init__(self):
        # self.url = "http://www.weather.com.cn/weather/101210404.shtml"
        self.url = "http://www.weather.com.cn/weather/101010700.shtml"
        self.headers = {
            "User-Agent": " 
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36"}

    def get_url_content(self):
        return requests.get(self.url, headers=self.headers).content.decode()

    def get_weather_data(self, html):
        tmp_html = etree.HTML(html)
        tomorrow_doc = \
        tmp_html.xpath("//div[contains(@class,'con') and contains(@class,'today')]//div[@class='c7d']/ul/li[2]")[0]
        weather_data = {}
        weather_data["日期"] = tomorrow_doc.xpath("./h1/text()")[0]
        weather_data["天氣"] = tomorrow_doc.xpath("./p[@class='wea']/@title")[0]
        weather_data["最高氣溫"] = tomorrow_doc.xpath("./p[@class='tem']/span/text()")[0]
        weather_data["最低氣溫"] = tomorrow_doc.xpath("./p[@class='tem']/i/text()")[0]
        weather_data["空速"] = tomorrow_doc.xpath("./p[@class='win']/i/text()")[0]
        return weather_data

    def run(self):
        # 獲取url請求內容
        content_html = self.get_url_content()
        # 根據url內容獲取天氣資料
        data = self.get_weather_data(content_html)
        # 列印爬取的天氣資料

        # data_dict = []
        # for i in data:
        #     data_dict.append()
        #     print(data_dict)
        print(data)

if __name__ == '__main__':
    spider = WeatherSpider()
    spider.run()

3、合併多個表格到一個sheet裡面

# -*- coding: utf-8 -*-

# 將多個Excel檔案合併成一個
import xlrd
import xlsxwriter


# 開啟一個excel檔案
def open_xls(file):
    fh = xlrd.open_workbook(file)
    return fh


# 獲取excel中所有的sheet表
def getsheet(fh):
    return fh.sheets()


# 獲取sheet表的行數
def getnrows(fh, sheet):
    table = fh.sheets()[sheet]
    return table.nrows


# 讀取檔案內容並返回行內容
def getFilect(file, shnum):
    fh = open_xls(file)
    table = fh.sheets()[shnum]
    num = table.nrows
    for row in range(num):
        rdata = table.row_values(row)
        datavalue.append(rdata)
    return datavalue


# 獲取sheet表的個數
def getshnum(fh):
    x = 0
    sh = getsheet(fh)
    for sheet in sh:
        x += 1
    return x


if __name__ == '__main__':
    # 定義要合併的excel檔案列表
    allxls = ['F:/test/excel1.xlsx', 'F:/test/excel2.xlsx']
    # 儲存所有讀取的結果
    datavalue = []
    for fl in allxls:
        fh = open_xls(fl)
        x = getshnum(fh)
        for shnum in range(x):
            print("正在讀取檔案：" + str(fl) + "的第" + str(shnum) + "個sheet表的內容...")
            rvalue = getFilect(fl, shnum)
    # 定義最終合併後生成的新檔案
    endfile = 'F:/test/excel3.xlsx'
    wb1 = xlsxwriter.Workbook(endfile)
    # 建立一個sheet工作物件
    ws = wb1.add_worksheet()
    for a in range(len(rvalue)):
        for b in range(len(rvalue[a])):
            c = rvalue[a][b]
            ws.write(a, b, c)
    wb1.close()
    print("檔案合併完成")

4、對比兩個文字，查詢不同的資訊

# coding=utf-8
import re

oldt = []
newt = []
f = open('new', encoding='utf-8')
for a in f.readlines():
    a = a.strip()
    newt.append(a)
f.close()
q = open('old', encoding='utf-8')
for b in q.readlines():
    b = b.strip()
    oldt.append(b)
q.close()

for i in newt:
    if i in oldt:
        pass
    else:
        print('%s' % i)

5、柱狀圖

import numpy as np
import matplotlib.pyplot as plt

np.random.seed(19680801)

n_bins = 10
x = np.random.randn(1000, 3)

fig, axes = plt.subplots(nrows=2, ncols=2)
ax0, ax1, ax2, ax3 = axes.flatten()

colors = ['red', 'tan', 'lime']
ax0.hist(x, n_bins, density=True, histtype='bar', color=colors, label=colors)
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')

ax1.hist(x, n_bins, density=True, histtype='barstacked')
ax1.set_title('stacked bar')

ax2.hist(x,  histtype='barstacked', rwidth=0.9)

ax3.hist(x[:, 0], rwidth=0.9)
ax3.set_title('different sample sizes')

fig.tight_layout()
plt.show()

6、餅圖

# -*- coding:utf-8 -*-

import matplotlib.pyplot as plt

labels = 'Frogs', 'Hogs', 'Dogs', 'Logs'
sizes = [15, 30, 45, 10]
explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig1, (ax1, ax2) = plt.subplots(2)
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
ax1.axis('equal')
ax2.pie(sizes, autopct='%1.2f%%', shadow=True, startangle=90, explode=explode,
    pctdistance=1.12)
ax2.axis('equal')
ax2.legend(labels=labels, loc='upper right')

plt.show()

7、柱狀圖

# -*- coding:utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt

plt.subplot(1, 1, 1)

x = np.array([1, 2, 3, 4])
y1 = np.array([8566, 6482, 5335, 7310])
y2 = np.array([4283, 2667, 3655, 3241])

plt.bar(x, y1, width=0.3, label="任務量")
plt.bar(x + 0.3, y2, width=0.3, label="完成量")
# x+0.3相當於完成量的每個柱子右移0.3

plt.title("全國各分割槽任務量", loc="center")

# 新增資料標籤
for a, b in zip(x, y1):
    plt.text(a, b, b, ha='center', va="bottom", fontsize=12, color="blue")

for a, b in zip(x, y2):
    plt.text(a, b, b, ha='left', va="baseline", fontsize=12, color="r")

plt.xlabel('區域')
plt.ylabel('任務情況')

# 設定x軸刻度值
plt.xticks(x + 0.15, ["東區", "西區", "南區", "北區"])

plt.rcParams['font.sans-serif']=['SimHei'] #顯示中文標籤
plt.rcParams['axes.unicode_minus']=False   #這兩行需要手動設定

plt.grid(False)
plt.legend()  # 顯示圖例
plt.show()

8、折線圖

import numpy as np
import random
import xlrd
import xlwt

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

x=[1,2,3,4,5,6,7,8,9,10]
y=[3,4,5,6,7,8,9,10,11,12]

plt.grid(True,linestyle = "-.")

plt.plot(x,y,marker='*',color='green')

# 設定數字標籤
for a, b in zip(x, y):
    plt.text(a, b+2, b, ha='center', va='bottom', fontsize=10)

plt.ylim(0,30)
plt.xlabel('the value of k')
plt.ylabel('number')
plt.title('red vs green')
plt.show()

9、資料生成Excel

# -*- coding:utf-8 -*-

import xlsxwriter

# 建立一個excel
workbook = xlsxwriter.Workbook("../z周聯絡/chart_line.xlsx")
# 建立一個sheet
worksheet = workbook.add_worksheet()
# worksheet = workbook.add_worksheet("bug_analysis")

# 自定義樣式，加粗
bold = workbook.add_format({'bold': 1})

# --------1、準備資料並寫入excel---------------
# 向excel中寫入資料，建立圖示時要用到
headings = ['Number', 'testA', 'testB']
data = [
    ['2017-9-1', '2017-9-2', '2017-9-3', '2017-9-4', '2017-9-5', '2017-9-6'],
    [10, 40, 50, 20, 10, 50],
    [30, 60, 70, 50, 40, 30],
]

# 寫入表頭
worksheet.write_row('A1', headings, bold)

# 寫入資料
worksheet.write_column('A2', data[0])
worksheet.write_column('B2', data[1])
worksheet.write_column('C2', data[2])

# --------2、生成圖表並插入到excel---------------
# 建立一個柱狀圖(line chart)
chart_col = workbook.add_chart({'type': 'line'})

# 配置第一個系列資料
chart_col.add_series({
    # 這裡的sheet1是預設的值，因為我們在新建sheet時沒有指定sheet名
    # 如果我們新建sheet時設定了sheet名，這裡就要設定成相應的值
    'name': '=Sheet1!$B$1',
    'categories': '=Sheet1!$A$2:$A$7',
    'values':   '=Sheet1!$B$2:$B$7',
    'line': {'color': 'red'},
})

# 配置第二個系列資料
chart_col.add_series({
    'name': '=Sheet1!$C$1',
    'categories':  '=Sheet1!$A$2:$A$7',
    'values':   '=Sheet1!$C$2:$C$7',
    'line': {'color': 'yellow'},
})

# 配置第二個系列資料(用了另一種語法)
# chart_col.add_series({
#     'name': ['Sheet1', 0, 2],
#     'categories': ['Sheet1', 1, 0, 6, 0],
#     'values': ['Sheet1', 1, 2, 6, 2],
#     'line': {'color': 'yellow'},
# })

# 設定圖表的title 和 x，y軸資訊
chart_col.set_title({'name': 'The xxx site Bug Analysis'})
chart_col.set_x_axis({'name': 'Test number'})
chart_col.set_y_axis({'name':  'Sample length (mm)'})

# 設定圖表的風格
chart_col.set_style(1)

# 把圖表插入到worksheet並設定偏移
worksheet.insert_chart('A10', chart_col, {'x_offset': 25, 'y_offset': 10})

workbook.close()