feat: - vector
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
import jieba
|
||||
import jieba.analyse
|
||||
|
||||
text = "我爱自然语言处理技术"
|
||||
result = jieba.cut(text, cut_all=False)
|
||||
print("精确模式:", "/".join(result)) # 精确模式: 我/爱/自然语言/处理/技术
|
||||
|
||||
result = jieba.cut(text, cut_all=True)
|
||||
print("全模式:", "/".join(result)) # 全模式: 我/爱/自然/自然语言/语言/处理/技术
|
||||
|
||||
result = jieba.cut_for_search(text)
|
||||
print(
|
||||
"搜索引擎模式:", "/".join(result)
|
||||
) # 搜索引擎模式: 我/爱/自然/语言/自然语言/处理/技术
|
||||
|
||||
|
||||
# 关键词提取
|
||||
def main():
|
||||
text = (
|
||||
"自然语言处理是人工智能和语言学领域的重要分支,"
|
||||
"研究如何让计算机理解和生成人类语言。"
|
||||
)
|
||||
# 使用使用 TF-IDF 算法获取关键词及权重
|
||||
tfidy_keywords = jieba.analyse.extract_tags(text, topK=5, withWeight=True)
|
||||
# 说明:使用 TextRank 算法获取关键词及权重
|
||||
textrank_keywords = jieba.analyse.textrank(text, topK=5, withWeight=True)
|
||||
|
||||
print("TF-IDF")
|
||||
for word, weight in tfidy_keywords:
|
||||
print(f"{word}:{weight:.2f}")
|
||||
# 说明:输出 TextRank 结果
|
||||
print("\nTextRank:")
|
||||
for word, weight in textrank_keywords:
|
||||
print(f"{word}: {weight:.4f}")
|
||||
|
||||
|
||||
main()
|
||||
Reference in New Issue
Block a user