{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b1f9b75a-f626-487d-857a-27717aa788bf", "metadata": { "scrolled": true }, "outputs": [], "source": [ "!pip install -U pip setuptools wheel\n", "!pip install -U spacy\n", "!python -m spacy download en_core_web_sm" ] }, { "cell_type": "code", "execution_count": null, "id": "f08b8fa4-59d5-4a51-8979-d29aa7915d32", "metadata": {}, "outputs": [], "source": [ "import spacy\n", "from spacy import displacy\n", "from pathlib import Path\n", "\n", "nlp = spacy.load('en_core_web_sm')\n", "\n", "def highlight_root(sentence):\n", " doc = nlp(sentence)\n", " \n", " # Find the root of the sentence\n", " root = None\n", " for token in doc:\n", " if token.dep_ == 'ROOT':\n", " root = token\n", " break\n", " \n", " # Highlight the root entity in the output\n", " if root is not None:\n", " root_start = root.idx\n", " root_end = root.idx + len(root.text)\n", " \n", " text = [{\n", " 'text': sentence,\n", " 'ents': [{\n", " 'start': root_start,\n", " 'end': root_end,\n", " 'label': '',\n", " }],\n", " 'title': None\n", " }]\n", " displacy.render(text, style='ent', manual=True)\n", " else:\n", " print(\"No root found.\")\n", " return root" ] }, { "cell_type": "code", "execution_count": null, "id": "2d58bfcd-a222-4da2-970c-72326c478861", "metadata": {}, "outputs": [], "source": [ "sentence = \"The spectacular aurora light displays that appear in Earth’s atmosphere around the north and south magnetic poles were once mysterious phenomena.\"\n", "\n", "doc = nlp(sentence)" ] }, { "cell_type": "code", "execution_count": null, "id": "42ee6c93-b37d-494a-82d6-fb97a0f7acba", "metadata": {}, "outputs": [], "source": [ "# 标注每个单词的词性\n", "\n", "for token in doc:\n", " print(token.text, token.pos_)" ] }, { "cell_type": "code", "execution_count": null, "id": "517506f7-240a-4106-a62b-33dc7f86a391", "metadata": {}, "outputs": [], "source": [ "# 高亮标注主句的谓语动词\n", "\n", "root = highlight_root(sentence)" ] }, { "cell_type": "code", "execution_count": null, "id": "e6c5898f-4b0c-4e6e-81b5-0e8231c0e9cd", "metadata": {}, "outputs": [], "source": [ "# 主句的简化版本\n", "\n", "children = list(root.children)\n", "children.insert(1, root)\n", "simplified_setence = ' '.join(str(c) for c in children).strip().replace(\" .\", \".\").capitalize()\n", "\n", "print(simplified_setence)" ] }, { "cell_type": "code", "execution_count": null, "id": "df071fdb-0618-453a-8482-c818fea98a0a", "metadata": {}, "outputs": [], "source": [ "# 图形化显示句子成分之间的依赖关系\n", "\n", "displacy.render(doc, style=\"dep\", options={'distance': 60})" ] }, { "cell_type": "code", "execution_count": null, "id": "273694e6-2e32-48e6-9317-5353612a8c89", "metadata": {}, "outputs": [], "source": [ "# 将图形保存为 dep-graph.svg 文件\n", "\n", "svg = displacy.render(doc, style=\"dep\", jupyter=False)\n", "output_path = Path(\"dep-graph.svg\")\n", "output_path.open(\"w\", encoding=\"utf-8\").write(svg)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 5 }