使用 ggplot2 和 echarts4r 绘制我的微信好友分布图

使用 ggplot2 和 echarts4r 绘制我的微信好友分布图

今天下午随意看了一下echarts4r这个包。发现这个包对 ECharts 的封装比谢益辉的 recharts 好太多!使用起来非常容易且功能很强大。作为开篇,这篇博客介绍了如何使用 ggplot2 和 echarts4r 包绘制我的微信好友分布图(统计图表+地图)。

使用 Python 的 itchat 包获取微信好友列表数据

Python
1
2
3
4
5
6
import itchat
itchat.auto_login(hotReload = True)
friends = itchat.get_friends(update = True)
import pandas as pd
friends = pd.DataFrame(friends)
friends.to_csv("myfriends.csv")

运行上面的代码扫描一下二维码登录微信即可获得好友列表数据集myfriends.csv了。下面使用 R 对这个数据集进行汇总。

数据集整理

R
1
2
3
4
5
6
7
8
9
10
11
12
library(dplyr)
friends <- read.csv("myfriends.csv")

# 我们唯一需要的数据是省份
friends <- friends['Province']
# 去除含英文的行和空行(把含英文的行换成空行然后一起删除)
friends$Province <- gsub(pattern = "[a-z, A-Z]", friends$Province, replacement = "")
friends <- subset(friends, friends$Province != "")

friends$Province <- factor(friends$Province)
df <- summarise(group_by(friends, Province), count = length(Province))
df <- subset(df, df$Province != "'")

ggplot2 柱状图

到这里数据就整理好了!先用 ggplot2 绘制一幅条形统计图:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
df <- df[order(df$count), ]
df$Province <- factor(df$Province, levels = df$Province)
library(ggplot2)
library(RColorBrewer)
theme_set(theme_bw(base_size = 18, base_family = "STSong") +
theme(plot.title = element_text(hjust = 0.5)))
df$Province = factor(df$Province)
ggplot(df, aes(x = Province, y = count)) +
geom_bar(stat = "identity", aes(colour = Province, fill = Province)) +
coord_flip() +
theme(axis.title.y = element_blank()) +
scale_fill_viridis_d() +
scale_color_viridis_d() +
labs(title = "图:我的微信好友分布",
y = "好友数量",
subtitle = "") +
theme(legend.position = "none") +
theme(plot.title = element_text(family = "STSongti-SC-Bold")) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank())

ggplot2 饼图

再绘制一幅饼图:

R
1
2
3
4
5
6
7
8
9
10
11
ggplot(df, aes(x = 'temp', y = count, fill = Province)) +
geom_bar(stat = "identity", position = "stack") +
coord_polar(theta = 'y') +
scale_fill_viridis_d("省份") +
labs(title = "图:我的微信好友分布",
subtitle = "",
x = '',
y = '') +
theme_void(base_family = 'STSong', base_size = 20) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title = element_text(family = "STSongti-SC-Bold"))

ggplot2 + sf 绘制地理分布图

省份分布

在使用 ggplot2 绘制中国地图:
首先你需要一份中国身份的 shp 地图:
国界与省界.zip

通常我们在网上找到的 shp 地图数据集直接读入 R 中都会乱码,按照这篇文章:Shapefile 中文乱码与字段名字符截断问题分析和解决方法介绍的方法,当我们遇到中文乱码的时候,可以在 shp 所在文件夹里新建一个与之同名的 cpg 文件,例如我这里新建了一个bou2_4p.cpg文件,里面写上936即可。

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# ggplot&sf绘制中国地图
library(sf)
library(ggthemes)
library(ggrepel)
library(maps)
cn <- st_read("国界与省界/bou2_4p.shp")
cn <- st_as_sf(cn, crs = 4490)
# 简化省份的名称
cn$NAME <- gsub(pattern = "省", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "自治区", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "维吾尔", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "回族", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "特别行政区", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "壮族", cn$NAME, replacement = "")
cn$NAME <- gsub(pattern = "市", cn$NAME, replacement = "")

# 计算省份的质心
cn <- cbind(cn, st_coordinates(st_centroid(cn)))

# 把好友分布数据集合并过来
colnames(df) <- c("NAME", "count")
cn2<- merge(cn, df, id = "NAME")

# 创建省份标签数据集
cn3 <- cn[!duplicated(cn$NAME), ]
cn3<- merge(cn3, df, id = "NAME")

library(latex2exp)
p <- ggplot(data = cn) +
geom_sf(fill = "grey90") +
geom_sf(data = cn2, aes(fill = count)) +
geom_label(data = cn3, aes(x = X, y = Y, label = NAME),
family = "STSong", size = 3,
nudge_y = -1) +
labs(x = "", y = "", title = "我的微信好友分布") +
scale_fill_viridis_c("好友\n数量",
alpha = 0.4,
breaks = c(0, 20, 40, 60, 80, 100),
labels = c(0, 20, 40, 60, 80, 100)) +
scale_x_continuous(labels = c(TeX('$80^{o}E$'),
TeX('$90^{o}E$'),
TeX('$100^{o}E$'),
TeX('$110^{o}E$'),
TeX('$120^{o}E$'),
TeX('$130^{o}E$'))) +
scale_y_continuous(labels = c(TeX('$10^{o}N$'),
TeX('$20^{o}N$'),
TeX('$30^{o}N$'),
TeX('$40^{o}N$'),
TeX('$50^{o}N$'))) +
theme(panel.grid.major = element_line(color = gray(0.5),
linetype = "dashed",
size = 0.5),
panel.background = element_rect(fill = "aliceblue"))
print(p)

市级分布

当然我们首先需要准备一份市级的 shp 数据:
地级市地图.zip
这份地图数据的编码是正确的,所以不用再处理了。

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
fd <- read.csv("myfriends.csv")
fd <- fd['City']
fd$City <- factor(fd$City)
citydf <- summarise(group_by(fd, City), count = length(City))
citydf$City <- gsub(pattern = "[A-Z, a-z]", citydf$City, replacement = "")
citydf <- citydf[which(citydf$City != ""),]
citydf$City[which(citydf$City == "黄大仙区")] <- "香港"
citydf$City[which(citydf$City == "望德堂区")] <- "香港"
citydf$City[which(citydf$City == "北区")] <- "香港"
citydf$City[which(citydf$City == "宝山")] <- "上海"
citydf$City[which(citydf$City == "北碚")] <- "重庆"
citydf$City[which(citydf$City == "朝阳")] <- "北京"
citydf$City[which(citydf$City == "丰都")] <- "北京"
citydf$City[which(citydf$City == "海淀")] <- "北京"
citydf$City[which(citydf$City == "沙田区")] <- "香港"
citydf$City[which(citydf$City == "台北市")] <- "台北"
citydf$City[which(citydf$City == "杨浦")] <- "上海"

citymap <- st_read('地级市地图/zgdzj.shp')

citymap$NAME <- gsub(pattern = "市", citymap$NAME, replacement = "")
citymap <- st_as_sf(citymap)
colnames(citydf) <- c("NAME", "count")
citydf <- merge(citymap, citydf, id = "NAME")
ggplot(data = citymap) +
geom_sf(fill = 'antiquewhite1') +
geom_sf(data = citydf, aes(fill = count)) +
labs(x = "", y = "", title = "我的微信好友分布") +
scale_fill_viridis_c("好友\n数量",
alpha = 0.4,
breaks = c(0, 20, 40, 60, 80, 100),
labels = c(0, 20, 40, 60, 80, 100)) +
theme(panel.grid.major = element_line(color = gray(0.5),
linetype = "dashed",
size = 0.5),
panel.background = element_rect(fill = "aliceblue"))

echarts4r 柱状图

首先需要按照这个包及其辅助包:

R
1
2
3
remotes::install_github('JohnCoene/echarts4r.maps')
remotes::install_github('JohnCoene/echarts4r.assets')
remotes::install_github('JohnCoene/echarts4r')

因为这个包非常非常大,所以可以先去 GitHub 仓库把这个包的源码下载下来,然后再自己打包安装(另外包源码里有网站的介绍文档,离线版的会加载地更快一些)。

下面绘制柱状图:
这里可能需要一些 ECharts 基础。。。

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
(bar <- df %>%
e_charts(count, itemStyle = list(barBorderRadius = 7),
color = "#c29d73") %>%
e_bar(NAME) %>%
e_legend(show = F) %>%
e_title("我的微信好友分布",
textStyle = list("fontSize" = 30,"fontFamily" = "STSong"),
textAlign = "middle", left = "50%") %>%
e_axis(axis = c("x"), axisLabel = list("fontSize" = 15,
"fontFamily" = "STSong",
interval = 0),
type = 'value') %>%
e_axis(axis = c("y"), axisLabel = list("fontSize" = 15,
"fontFamily" = "STSong",
interval = 0),
type = 'category') %>%
e_toolbox_feature(feature = c("saveAsImage",
"dataZoom",
"dataView")) %>%
e_tooltip(trigger = 'axis',
formatter = "好友数量:{c}",
textStyle = list("fontSize" = 15,"fontFamily" = "STSong")))
# 使用htmlwidgets创建html文件
htmlwidgets::saveWidget(bar, "微信好友分布柱状图.html")

微信好友分布柱状图.html
因为在页面内嵌入 iframe 会让页面加载的很慢,所以我还是直接截图:

echarts4r 饼图

R
1
2
3
4
5
6
7
8
(pie <- df %>%
e_charts(NAME) %>%
e_pie(count) %>%
e_title("我的微信好友分布",
textStyle = list("fontSize" = 30,"fontFamily" = "STSong"),
left = "0%", top = "-5") %>%
e_theme("essos"))
htmlwidgets::saveWidget(pie, "微信好友分布饼图.html")

echarts4r 中国地图

R
1
2
3
4
5
6
7
8
9
10
11
12
(p <- df %>%
e_charts(NAME) %>%
em_map("China") %>%
e_map(count, map = "China") %>%
e_visual_map(count,
color = c(rev(brewer.pal(3, "Greens"))),
left = '90%',
top = '50%') %>%
e_title("我的微信好友分布",
textStyle = list("fontSize" = 30,"fontFamily" = "STSong"),
textAlign = "middle", left = "50%"))
htmlwidgets::saveWidget(p, "我的微信好友分布.html")

我的微信好友分布.html

echarts4r 自定义地图

该包还提供了自定义地图的功能,只需要提供相应地区的 geojson 数据即可,这种数据网上很多的,例如ECharts GEO jsonufoe/d3js-geojson。我从 ECharts 上下载了一份广东省的 GEO json 地图:
广东省.json
然后使用这份地图数据绘制我的广东好友的分布:

R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 准备一份广东的geojson文件:
# 下载地址:http://gallery.echartsjs.com/editor.html?c=xr1IEt3r4Q
json <- jsonlite::read_json("广东省.json")
df$City <- paste(df$City, "市", sep = "")
df %>%
e_charts(City) %>%
e_map_register("广东", json) %>%
e_map(count, map = "广东") %>%
e_visual_map(count,
color = c(rev(brewer.pal(3, "Greens"))),
left = '90%',
top = '50%') %>%
e_title("我的广东微信好友分布",
textStyle = list("fontSize" = 30,"fontFamily" = "STSong"),
textAlign = "middle", left = "50%")

我的广东微信好友分布.html

# Python, R

Comments

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×