# 分词
# !pip install --upgrade paddlepaddle-i https://pypi.tuna.tsinghua.edu.cn/simple
# !pip install --upgrade paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
# !pip install --upgrade paddlenlp -i https://pypi.org/simple
import paddle
from paddlenlp import Taskflow
lac = Taskflow("lexical_analysis")
text_lac = lac(["模型评估是机器学习中重要的一环。", "文本生成模型可以创作出令人惊叹的文本内容。"])
import pandas as pd
pd.DataFrame(text_lac)
# 词性
data = [
("n", "普通名词"),
("f", "方位名词"),
("s", "处所名词"),
("t", "时间"),
("nr", "人名"),
("ns", "地名"),
("nt", "机构名"),
("nw", "作品名"),
("nz", "其他专名"),
("v", "普通动词"),
("vd", "动副词"),
("vn", "名动词"),
("a", "形容词"),
("ad", "副形词"),
("an", "名形词"),
("d", "副词"),
("m", "数量词"),
("q", "量词"),
("r", "代词"),
("p", "介词"),
("c", "连词"),
("u", "助词"),
("xc", "其他虚词"),
("w", "标点符号"),
("PER", "人名"),
("LOC", "地名"),
("ORG", "机构名"),
("TIME", "时间"),
]
pos_tag_df = pd.DataFrame(data,columns=["tags", "词性"])