Package: add 1000-hours repo in workspace (#337)
* add 1000-hours repo in workspace * update README
This commit is contained in:
161
1000-hours/public/jupyter-notebooks/spaCy.ipynb
Normal file
161
1000-hours/public/jupyter-notebooks/spaCy.ipynb
Normal file
@@ -0,0 +1,161 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1f9b75a-f626-487d-857a-27717aa788bf",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install -U pip setuptools wheel\n",
|
||||
"!pip install -U spacy\n",
|
||||
"!python -m spacy download en_core_web_sm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f08b8fa4-59d5-4a51-8979-d29aa7915d32",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"from spacy import displacy\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"nlp = spacy.load('en_core_web_sm')\n",
|
||||
"\n",
|
||||
"def highlight_root(sentence):\n",
|
||||
" doc = nlp(sentence)\n",
|
||||
" \n",
|
||||
" # Find the root of the sentence\n",
|
||||
" root = None\n",
|
||||
" for token in doc:\n",
|
||||
" if token.dep_ == 'ROOT':\n",
|
||||
" root = token\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
" # Highlight the root entity in the output\n",
|
||||
" if root is not None:\n",
|
||||
" root_start = root.idx\n",
|
||||
" root_end = root.idx + len(root.text)\n",
|
||||
" \n",
|
||||
" text = [{\n",
|
||||
" 'text': sentence,\n",
|
||||
" 'ents': [{\n",
|
||||
" 'start': root_start,\n",
|
||||
" 'end': root_end,\n",
|
||||
" 'label': '',\n",
|
||||
" }],\n",
|
||||
" 'title': None\n",
|
||||
" }]\n",
|
||||
" displacy.render(text, style='ent', manual=True)\n",
|
||||
" else:\n",
|
||||
" print(\"No root found.\")\n",
|
||||
" return root"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d58bfcd-a222-4da2-970c-72326c478861",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sentence = \"The spectacular aurora light displays that appear in Earth’s atmosphere around the north and south magnetic poles were once mysterious phenomena.\"\n",
|
||||
"\n",
|
||||
"doc = nlp(sentence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42ee6c93-b37d-494a-82d6-fb97a0f7acba",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 标注每个单词的词性\n",
|
||||
"\n",
|
||||
"for token in doc:\n",
|
||||
" print(token.text, token.pos_)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "517506f7-240a-4106-a62b-33dc7f86a391",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 高亮标注主句的谓语动词\n",
|
||||
"\n",
|
||||
"root = highlight_root(sentence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e6c5898f-4b0c-4e6e-81b5-0e8231c0e9cd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 主句的简化版本\n",
|
||||
"\n",
|
||||
"children = list(root.children)\n",
|
||||
"children.insert(1, root)\n",
|
||||
"simplified_setence = ' '.join(str(c) for c in children).strip().replace(\" .\", \".\").capitalize()\n",
|
||||
"\n",
|
||||
"print(simplified_setence)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "df071fdb-0618-453a-8482-c818fea98a0a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 图形化显示句子成分之间的依赖关系\n",
|
||||
"\n",
|
||||
"displacy.render(doc, style=\"dep\", options={'distance': 60})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "273694e6-2e32-48e6-9317-5353612a8c89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 将图形保存为 dep-graph.svg 文件\n",
|
||||
"\n",
|
||||
"svg = displacy.render(doc, style=\"dep\", jupyter=False)\n",
|
||||
"output_path = Path(\"dep-graph.svg\")\n",
|
||||
"output_path.open(\"w\", encoding=\"utf-8\").write(svg)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user