Commit 072924

Kevin Barry · Kevin Barry · commit 8fa04d3a2aef · 2024-07-29T18:29:15.000-04:00
diff --git a/part11.ipynb b/part11.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -21,40 +21,74 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Python\n",
-      "is\n",
-      "n't\n",
-      "just\n",
-      "a\n",
-      "language\n",
-      ".\n",
-      "It\n",
-      "'s\n",
-      "a\n",
-      "framework\n",
-      "!\n"
+      "Python PROPN\n",
+      "is AUX\n",
+      "n't PART\n",
+      "just ADV\n",
+      "a DET\n",
+      "language NOUN\n",
+      ". PUNCT\n",
+      "It PRON\n",
+      "'s AUX\n",
+      "a DET\n",
+      "framework NOUN\n",
+      "! PUNCT\n"
      ]
     }
    ],
    "source": [
     "doc = nlp(\"Python isn't just a language. It's a framework!\")\n",
     "for token in doc:\n",
-    "    print(token)"
+    "    print(token, token.pos_)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "text     lemma    pos     ent      shape   punct   morph                         \n",
+      "------   ------   -----   ------   -----   -----   ------------------------------\n",
+      "Hi       hi       INTJ             Xx      False                                 \n",
+      ",        ,        PUNCT            ,       True    PunctType=Comm                \n",
+      "my       my       PRON             xx      False   Number=Sing|Person=1|Poss=Yes|PronType=Prs\n",
+      "name     name     NOUN             xxxx    False   Number=Sing                   \n",
+      "is       be       AUX              xx      False   Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin\n",
+      "Kevin    Kevin    PROPN   PERSON   Xxxxx   False   Number=Sing                   \n",
+      ".        .        PUNCT            .       True    PunctType=Peri                \n",
+      "I        I        PRON             X       False   Case=Nom|Number=Sing|Person=1|PronType=Prs\n",
+      "like     like     VERB             xxxx    False   Tense=Pres|VerbForm=Fin       \n",
+      "to       to       PART             xx      False                                 \n",
+      "write    write    VERB             xxxx    False   VerbForm=Inf                  \n",
+      "Python   Python   PROPN            Xxxxx   False   Number=Sing                   \n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from wasabi import table\n",
+    "\n",
+    "def text_to_doctable(txt):\n",
+    "    doc = nlp(txt)\n",
+    "    header = (\"text\", \"lemma\", \"pos\", \"ent\", \"shape\", \"punct\", \"morph\")\n",
+    "    data = [(tok.text, tok.lemma_, tok.pos_, tok.ent_type_, tok.shape_, tok.is_punct, tok.morph) for tok in doc]\n",
+    "    formatted = table(data, header=header, divider=True)\n",
+    "    print(formatted)\n",
+    "\n",
+    "text_to_doctable(\"Hi, my name is Kevin. I like to write Python\")"
+   ]
   }
  ],
  "metadata": {