Skip to content

Commit e93586f

Browse files
authored
Update 图3-9-3 单篇文章的词云图.R
1 parent 68c9384 commit e93586f

File tree

1 file changed

+25
-21
lines changed

1 file changed

+25
-21
lines changed
Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,45 @@
1-
#EasyCharts团队出品,
1+
2+
#EasyShu团队出品,更多文章请关注微信公众号【EasyShu】
23
#如有问题修正与深入学习,可联系微信:EasyCharts
34

45
library(tm)
56
library(wordcloud)
6-
7-
Paper1<-paste(scan("Paper1.txt", what = character(0),sep = ""), collapse = " ")
8-
Paper2<-paste(scan("Paper2.txt", what = character(0),sep = ""), collapse = " ")
9-
7+
Paper1<-paste(scan("Paper1.txt", what = character(0),sep = ""), collapse = " ") #读入TXT 文档1
8+
Paper2<-paste(scan("Paper2.txt", what = character(0),sep = ""), collapse = " ") #读入TXT 文档2
109
tmpText<- data.frame(c(Paper1, Paper2),row.names=c("Text1","Text2"))
11-
1210
df_title <- data.frame(doc_id=row.names(tmpText),
1311
text=tmpText$c.Paper1..Paper2.)
14-
1512
ds <- DataframeSource(df_title)
16-
corp = Corpus(ds)
17-
corp = tm_map(corp,removePunctuation)
18-
corp = tm_map(corp,PlainTextDocument)
19-
corp = tm_map(corp,removeNumbers)
20-
corp = tm_map(corp, function(x){removeWords(x,stopwords())})
21-
13+
#创建一个数据框格式的数据源,首列是文档id(doc_id),第二列是文档内容
14+
corp <- VCorpus(ds)
15+
#加载文档集中的文本并生成语料库文件
16+
corp<- tm_map(corp,removePunctuation) #清除语料库内的标点符号
17+
corp <- tm_map(corp,PlainTextDocument) #转换为纯文本
18+
corp <- tm_map(corp,removeNumbers) #清除数字符号
19+
corp <- tm_map(corp, function(x){removeWords(x,stopwords())}) #过滤停止词库
2220
term.matrix <- TermDocumentMatrix(corp)
23-
term.matrix <- as.matrix(term.matrix)
21+
#利用TermDocumentMatrix()函数将处理后的语料库进行断字处理,生成词频权重矩阵
22+
23+
term.matrix <- as.matrix(term.matrix) #频率
2424
colnames(term.matrix) <- c("Paper1","paper2")
25+
df<-data.frame(term.matrix)
26+
write.csv(df,'term_matrix.csv') #导出两篇文章的频率分析结果
2527

26-
#------------------------------------------------------------------------------------------------------
27-
comparison.cloud(term.matrix, max.words=300, random.order=FALSE, rot.per=.15, c(4,0.4), title.size=1.4)
28+
#---------------------------------------导入数据------------------------------------------
29+
df<-read.csv('term_matrix.csv',header=TRUE,row.names=1)
2830

29-
comparison.cloud(term.matrix,max.words=300,random.order=FALSE,colors=c("#00B2FF", "red"))
30-
commonality.cloud(term.matrix,max.words=100,random.order=FALSE,color="#E7298A")
31+
#----------------------------------------两篇文章数据的对比-------------------------------------------------------------
32+
comparison.cloud(df, max.words=300, random.order=FALSE, rot.per=.15, c(4,0.4), title.size=1.4)
33+
34+
comparison.cloud(df,max.words=300,random.order=FALSE,colors=c("#00B2FF", "red"))
35+
commonality.cloud(df,max.words=100,random.order=FALSE,color="#E7298A")
3136

3237

3338
# comparison cloud
34-
comparison.cloud(term.matrix, random.order=FALSE,
39+
comparison.cloud(df, random.order=FALSE,
3540
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
3641
title.size=1.5, max.words=500)
3742

38-
#------------------------------------------------------------------------------------------------------
39-
df<-data.frame(term.matrix)
43+
#-------------------------------------单篇文章数据的展示-----------------------------------------------------------------
4044
#Colors<-colorRampPalette(rev(brewer.pal(9,'RdBu')))(length(df$Paper1>10))
4145
wordcloud(row.names(df) , df$Paper1 , min.freq=10,col=brewer.pal(8, "Dark2"), rot.per=0.3 )

0 commit comments

Comments
 (0)