import docx
import jieba
from collections import Counter
import pandas as pd
import xlwt
document=docx.Document(r"C:\Users\fj\Desktop\讲话稿.docx")
content=" ".join([para.text for para in document.paragraphs])
seg_list=jieba.cut(content,cut_all=False)
seg_list=[
word
for word in seg_list
if len(word)>1
]
counter=Counter(seg_list)
data=pd.DataFrame(list(counter.items()),columns=[“词语”,“频率”])
data.to_excel(“词频统计.xlsx”,index=False)