Skip to content

Commit

Permalink
update NLTK example
Browse files Browse the repository at this point in the history
  • Loading branch information
ugik committed Feb 3, 2017
1 parent 564fcb7 commit 2a72d62
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 57 deletions.
Original file line number Diff line number Diff line change
@@ -1,23 +1,8 @@
{
"cells": [
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": []
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 150,
"execution_count": 33,
"metadata": {
"collapsed": false
},
Expand All @@ -26,20 +11,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"8 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'Walmart', '?']\n"
"10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n"
]
}
],
"source": [
"from nltk.tokenize import word_tokenize\n",
"sentence = \"Jim is bringing his bulldog to Walmart?\"\n",
"sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n",
"tokens = word_tokenize(sentence)\n",
"print (len(tokens), tokens)"
]
},
{
"cell_type": "code",
"execution_count": 151,
"execution_count": 34,
"metadata": {
"collapsed": false
},
Expand All @@ -48,7 +33,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"5 ['Jim', 'bringing', 'bulldog', 'Walmart', '?']\n"
"6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n"
]
}
],
Expand All @@ -61,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 152,
"execution_count": 41,
"metadata": {
"collapsed": false
},
Expand All @@ -73,7 +58,8 @@
"jim\n",
"bring\n",
"bulldog\n",
"walmart\n",
"eat\n",
"friend\n",
"?\n"
]
}
Expand All @@ -87,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 153,
"execution_count": 36,
"metadata": {
"collapsed": false
},
Expand All @@ -96,7 +82,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('Walmart', 'NNP'), ('?', '.')] Nouns: []\n"
"[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n"
]
}
],
Expand All @@ -110,7 +96,7 @@
},
{
"cell_type": "code",
"execution_count": 154,
"execution_count": 37,
"metadata": {
"collapsed": false
},
Expand All @@ -119,8 +105,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('Walmart', 'NNP', 'B-PERSON'), ('?', '.', 'O')]\n",
"[]\n"
"[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
"[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n"
]
}
],
Expand All @@ -146,7 +132,7 @@
},
{
"cell_type": "code",
"execution_count": 155,
"execution_count": 38,
"metadata": {
"collapsed": false
},
Expand All @@ -170,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 156,
"execution_count": 39,
"metadata": {
"collapsed": false
},
Expand All @@ -179,23 +165,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0.8387096774193549\n",
"0.36363636363636365\n",
"0.8387096774193549\n"
"0.13333333333333333\n"
]
}
],
"source": [
"w1 = wordnet.synset('bulldog.n.01')\n",
"w2 = wordnet.synset('poodle.n.01')\n",
"print(w1.wup_similarity(w2))\n",
"\n",
"w2 = wordnet.synset('car.n.01')\n",
"print(w1.wup_similarity(w2))\n",
"w1 = wordnet.synset('bulldog.n.01')\n",
"w2 = wordnet.synset('poodle.n.01')\n",
"print(w1.wup_similarity(w2))"
"\n",
"w2 = wordnet.synset('space.n.01')\n",
"print(w1.wup_similarity(w2))\n"
]
},
{
"cell_type": "code",
"execution_count": 157,
"execution_count": 40,
"metadata": {
"collapsed": false
},
Expand All @@ -206,8 +196,8 @@
"text": [
"{'bulldog', 'English_bulldog'}\n",
"set()\n",
"{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n",
"{'poor_people', 'poor', 'lean'}\n"
"{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n",
"{'poor', 'poor_people', 'lean'}\n"
]
}
],
Expand Down
47 changes: 26 additions & 21 deletions Text Analytics - working with words.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 174,
"execution_count": 51,
"metadata": {
"collapsed": false
},
Expand All @@ -11,20 +11,20 @@
"name": "stdout",
"output_type": "stream",
"text": [
"7 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'PetSmart', '?']\n"
"10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n"
]
}
],
"source": [
"from nltk.tokenize import word_tokenize\n",
"sentence = \"Jim is bringing his bulldog PetSmart?\"\n",
"sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n",
"tokens = word_tokenize(sentence)\n",
"print (len(tokens), tokens)"
]
},
{
"cell_type": "code",
"execution_count": 175,
"execution_count": 52,
"metadata": {
"collapsed": false
},
Expand All @@ -33,7 +33,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"5 ['Jim', 'bringing', 'bulldog', 'PetSmart', '?']\n"
"6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n"
]
}
],
Expand All @@ -46,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 176,
"execution_count": 53,
"metadata": {
"collapsed": false
},
Expand All @@ -58,7 +58,8 @@
"jim\n",
"bring\n",
"bulldog\n",
"petsmart\n",
"eat\n",
"friend\n",
"?\n"
]
}
Expand All @@ -72,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 177,
"execution_count": 54,
"metadata": {
"collapsed": false
},
Expand All @@ -81,7 +82,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('PetSmart', 'NNP'), ('?', '.')] Nouns: []\n"
"[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n"
]
}
],
Expand All @@ -95,7 +96,7 @@
},
{
"cell_type": "code",
"execution_count": 178,
"execution_count": 55,
"metadata": {
"collapsed": false
},
Expand All @@ -104,8 +105,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('PetSmart', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
"[('PetSmart', 'NNP', 'B-ORGANIZATION')]\n"
"[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
"[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n"
]
}
],
Expand All @@ -131,7 +132,7 @@
},
{
"cell_type": "code",
"execution_count": 179,
"execution_count": 56,
"metadata": {
"collapsed": false
},
Expand All @@ -155,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 180,
"execution_count": 57,
"metadata": {
"collapsed": false
},
Expand All @@ -164,23 +165,27 @@
"name": "stdout",
"output_type": "stream",
"text": [
"0.8387096774193549\n",
"0.36363636363636365\n",
"0.8387096774193549\n"
"0.13333333333333333\n"
]
}
],
"source": [
"w1 = wordnet.synset('bulldog.n.01')\n",
"w2 = wordnet.synset('poodle.n.01')\n",
"print(w1.wup_similarity(w2))\n",
"\n",
"w2 = wordnet.synset('car.n.01')\n",
"print(w1.wup_similarity(w2))\n",
"w1 = wordnet.synset('bulldog.n.01')\n",
"w2 = wordnet.synset('poodle.n.01')\n",
"print(w1.wup_similarity(w2))"
"\n",
"w2 = wordnet.synset('space.n.01')\n",
"print(w1.wup_similarity(w2))\n"
]
},
{
"cell_type": "code",
"execution_count": 181,
"execution_count": 58,
"metadata": {
"collapsed": false
},
Expand All @@ -191,8 +196,8 @@
"text": [
"{'bulldog', 'English_bulldog'}\n",
"set()\n",
"{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n",
"{'poor_people', 'poor', 'lean'}\n"
"{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n",
"{'poor', 'poor_people', 'lean'}\n"
]
}
],
Expand Down

0 comments on commit 2a72d62

Please sign in to comment.