資料視覺化之二手房資料實戰(一)
阿新 • • 發佈:2022-03-10
1、導包
import pandas as pd
from pyecharts.charts import Bar, Pie, Map, Line, Scatter
from pyecharts import options as opts
from pyecharts.render import make_snapshot
from snapshot_selenium import snapshot
from pyecharts.globals import CurrentConfig
2、讀取資料
#
df = pd.read_excel("./xlsx/二手房資料.xlsx")
3、資料清洗
# 檢視資料前5列
print(df.head())
# 檢視資料表結構
print(df.describe())
# 檢視資料為空值的總數
print(df.isnull().sum())
# 將電梯這一列有空值的資料填充為未知,inplace=True在原資料進行修改
print(df["電梯"].fillna("未知", inplace=True))
# 在次檢視是否有空值
print(df.isnull().sum())
4、資料視覺化
視覺化展示-北京各城區二手房數量地圖分佈
# 根據市區進行分組後計算小區的數量 nums = df.groupby("市區")["小區"].agg("count") # 將這市區轉換為list的列表 citys = nums.index.tolist() # 每個值加上區 city = [i + "區" for i in citys] # 小區的數量轉換為tolist() xqnum = nums.values.tolist() map = ( Map(init_opts=opts.InitOpts(bg_color="white")) .add("", [list(z) for z in zip(city, xqnum)], "北京") .set_global_opts(title_opts=opts.TitleOpts(title="北京市二手房區分佈"), visualmap_opts=opts.VisualMapOpts(is_show=True, max_=3000)) ) # 如果只需要HTML5程式碼不要圖片選第一個,要圖片選第二個 # map.render("beiJingMap.html") make_snapshot(snapshot, map.render("beiJingMap.html"), "./beiJingMap.png")
視覺化展示-北京各城區二手房數量-平均價格柱狀圖
prices = df.groupby("市區")["價格(萬元)"].agg("mean") prices = prices.round(2) #只要兩位小數 price = prices.values.tolist() bar = ( Bar(init_opts=opts.InitOpts(bg_color="white")) .add_xaxis(citys) .add_yaxis("數量", xqnum) .extend_axis(yaxis=opts.AxisOpts(is_show=True, max_=900, min_=200, name="價格(萬元)")) .set_global_opts(yaxis_opts=opts.AxisOpts(name="數量"), title_opts=opts.TitleOpts(title="各城區二手房數量-平均價格柱狀圖"), tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis", axis_pointer_type="cross"), xaxis_opts=opts.AxisOpts(axispointer_opts=opts.AxisPointerOpts(is_show=True, type_="shadow"))) ) line = ( Line() .add_xaxis(citys) .add_yaxis("價格", price, z=10, yaxis_index=1) ) bar.overlap(line) make_snapshot(snapshot, bar.render("./Mean房價數量.html"), "./Mean房價數量.png")
視覺化展示-二手房價格最高的TOP15
df.sort_values(by="價格(萬元)", ascending=False, inplace=True)
top_price = df["價格(萬元)"].apply("{0:.0f}".format).head(15).tolist()
xiaoqu = df["小區"].head(15).tolist()
top_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(xiaoqu)
.add_yaxis("數量", top_price)
.set_global_opts(xaxis_opts=opts.AxisOpts(name="數量"),
yaxis_opts=opts.AxisOpts(name="價格(萬元)"))
)
make_snapshot(snapshot,top_bar.render("./TOP15房價最高.html"),"./TOP15房價最高.png")
裝修情況-有無電梯(玫瑰圖)
# 計算Bar資料
zhuangxiu = df.groupby("裝修情況")["裝修情況"].agg("count")
x_zx = zhuangxiu.index.tolist()
y_num = zhuangxiu.values.tolist()
# 計算圓資料
dianti = df.groupby("電梯")["電梯"].agg("count")
youdt = dianti.index.tolist()
dt_num = dianti.values.tolist()
youdt.pop()
dt_num.pop()
zx_bar = (
Bar(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(x_zx)
.add_yaxis("", y_num, category_gap="50%")
.set_global_opts(legend_opts=opts.LegendOpts(pos_left='85%', pos_top="63%", orient="scroll"),
yaxis_opts=opts.AxisOpts(name="裝修情況"),
xaxis_opts=opts.AxisOpts(name="數量"))
.set_series_opts(label_opts=opts.LabelOpts(position="right"))
.reversal_axis()
)
zx_pie = (
Pie()
.add("", [list(z) for z in zip(youdt, dt_num)], radius=["8%", "25%"], rosetype="radius", center=["75%", "65%"])
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}:{c}\n({d}%)"))
)
zx_bar.overlap(zx_pie)
make_snapshot(snapshot, zx_bar.render("./裝修情況.html"), "./裝修情況.png")
二手房總價與面積(散點圖)
jg = df["價格(萬元)"].tolist()
mj = df["面積(㎡)"].tolist()
scatter = (
Scatter(init_opts=opts.InitOpts(bg_color="white"))
.add_xaxis(mj)
.add_yaxis("", jg)
.set_global_opts(xaxis_opts=opts.AxisOpts(type_="value", name="面積(㎡)"),
yaxis_opts=opts.AxisOpts(name="價格(萬元)"))
)
make_snapshot(snapshot,scatter.render("./散點圖.html"),"./散點圖.png")