|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 5, |
| 5 | + "execution_count": 3, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [],
|
8 | 8 | "source": [
|
|
11 | 11 | },
|
12 | 12 | {
|
13 | 13 | "cell_type": "code",
|
14 |
| - "execution_count": 8, |
| 14 | + "execution_count": 4, |
15 | 15 | "metadata": {},
|
16 | 16 | "outputs": [],
|
17 | 17 | "source": [
|
|
21 | 21 | },
|
22 | 22 | {
|
23 | 23 | "cell_type": "code",
|
24 |
| - "execution_count": 17, |
| 24 | + "execution_count": 5, |
25 | 25 | "metadata": {},
|
26 | 26 | "outputs": [
|
27 | 27 | {
|
28 | 28 | "name": "stdout",
|
29 | 29 | "output_type": "stream",
|
30 | 30 | "text": [
|
31 |
| - "Python\n", |
32 |
| - "is\n", |
33 |
| - "n't\n", |
34 |
| - "just\n", |
35 |
| - "a\n", |
36 |
| - "language\n", |
37 |
| - ".\n", |
38 |
| - "It\n", |
39 |
| - "'s\n", |
40 |
| - "a\n", |
41 |
| - "framework\n", |
42 |
| - "!\n" |
| 31 | + "Python PROPN\n", |
| 32 | + "is AUX\n", |
| 33 | + "n't PART\n", |
| 34 | + "just ADV\n", |
| 35 | + "a DET\n", |
| 36 | + "language NOUN\n", |
| 37 | + ". PUNCT\n", |
| 38 | + "It PRON\n", |
| 39 | + "'s AUX\n", |
| 40 | + "a DET\n", |
| 41 | + "framework NOUN\n", |
| 42 | + "! PUNCT\n" |
43 | 43 | ]
|
44 | 44 | }
|
45 | 45 | ],
|
46 | 46 | "source": [
|
47 | 47 | "doc = nlp(\"Python isn't just a language. It's a framework!\")\n",
|
48 | 48 | "for token in doc:\n",
|
49 |
| - " print(token)" |
| 49 | + " print(token, token.pos_)" |
50 | 50 | ]
|
51 | 51 | },
|
52 | 52 | {
|
53 | 53 | "cell_type": "code",
|
54 |
| - "execution_count": null, |
| 54 | + "execution_count": 7, |
55 | 55 | "metadata": {},
|
56 |
| - "outputs": [], |
57 |
| - "source": [] |
| 56 | + "outputs": [ |
| 57 | + { |
| 58 | + "name": "stdout", |
| 59 | + "output_type": "stream", |
| 60 | + "text": [ |
| 61 | + "\n", |
| 62 | + "text lemma pos ent shape punct morph \n", |
| 63 | + "------ ------ ----- ------ ----- ----- ------------------------------\n", |
| 64 | + "Hi hi INTJ Xx False \n", |
| 65 | + ", , PUNCT , True PunctType=Comm \n", |
| 66 | + "my my PRON xx False Number=Sing|Person=1|Poss=Yes|PronType=Prs\n", |
| 67 | + "name name NOUN xxxx False Number=Sing \n", |
| 68 | + "is be AUX xx False Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin\n", |
| 69 | + "Kevin Kevin PROPN PERSON Xxxxx False Number=Sing \n", |
| 70 | + ". . PUNCT . True PunctType=Peri \n", |
| 71 | + "I I PRON X False Case=Nom|Number=Sing|Person=1|PronType=Prs\n", |
| 72 | + "like like VERB xxxx False Tense=Pres|VerbForm=Fin \n", |
| 73 | + "to to PART xx False \n", |
| 74 | + "write write VERB xxxx False VerbForm=Inf \n", |
| 75 | + "Python Python PROPN Xxxxx False Number=Sing \n", |
| 76 | + "\n" |
| 77 | + ] |
| 78 | + } |
| 79 | + ], |
| 80 | + "source": [ |
| 81 | + "from wasabi import table\n", |
| 82 | + "\n", |
| 83 | + "def text_to_doctable(txt):\n", |
| 84 | + " doc = nlp(txt)\n", |
| 85 | + " header = (\"text\", \"lemma\", \"pos\", \"ent\", \"shape\", \"punct\", \"morph\")\n", |
| 86 | + " data = [(tok.text, tok.lemma_, tok.pos_, tok.ent_type_, tok.shape_, tok.is_punct, tok.morph) for tok in doc]\n", |
| 87 | + " formatted = table(data, header=header, divider=True)\n", |
| 88 | + " print(formatted)\n", |
| 89 | + "\n", |
| 90 | + "text_to_doctable(\"Hi, my name is Kevin. I like to write Python\")" |
| 91 | + ] |
58 | 92 | }
|
59 | 93 | ],
|
60 | 94 | "metadata": {
|
|
0 commit comments