From 2a72d623b8e580d7042d2e1c83e294c7f667e690 Mon Sep 17 00:00:00 2001
From: ugik <gk@ugik.com>
Date: Fri, 3 Feb 2017 15:24:02 -0500
Subject: [PATCH] update NLTK example

---
 ...tics - working with words-checkpoint.ipynb | 62 ++++++++-----------
 Text Analytics - working with words.ipynb     | 47 +++++++-------
 2 files changed, 52 insertions(+), 57 deletions(-)

diff --git a/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb b/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb
index 9b42211..866d8c6 100644
--- a/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb	
+++ b/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb	
@@ -1,23 +1,8 @@
 {
  "cells": [
-  {
-   "cell_type": "raw",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "raw",
-   "metadata": {},
-   "source": []
-  },
   {
    "cell_type": "code",
-   "execution_count": 150,
+   "execution_count": 33,
    "metadata": {
     "collapsed": false
    },
@@ -26,20 +11,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "8 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'Walmart', '?']\n"
+      "10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n"
      ]
     }
    ],
    "source": [
     "from nltk.tokenize import word_tokenize\n",
-    "sentence = \"Jim is bringing his bulldog to Walmart?\"\n",
+    "sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n",
     "tokens = word_tokenize(sentence)\n",
     "print (len(tokens), tokens)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": 34,
    "metadata": {
     "collapsed": false
    },
@@ -48,7 +33,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5 ['Jim', 'bringing', 'bulldog', 'Walmart', '?']\n"
+      "6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n"
      ]
     }
    ],
@@ -61,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 152,
+   "execution_count": 41,
    "metadata": {
     "collapsed": false
    },
@@ -73,7 +58,8 @@
       "jim\n",
       "bring\n",
       "bulldog\n",
-      "walmart\n",
+      "eat\n",
+      "friend\n",
       "?\n"
      ]
     }
@@ -87,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 153,
+   "execution_count": 36,
    "metadata": {
     "collapsed": false
    },
@@ -96,7 +82,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('Walmart', 'NNP'), ('?', '.')] Nouns: []\n"
+      "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n"
      ]
     }
    ],
@@ -110,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 154,
+   "execution_count": 37,
    "metadata": {
     "collapsed": false
    },
@@ -119,8 +105,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('Walmart', 'NNP', 'B-PERSON'), ('?', '.', 'O')]\n",
-      "[]\n"
+      "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
+      "[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n"
      ]
     }
    ],
@@ -146,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 155,
+   "execution_count": 38,
    "metadata": {
     "collapsed": false
    },
@@ -170,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 39,
    "metadata": {
     "collapsed": false
    },
@@ -179,23 +165,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "0.8387096774193549\n",
       "0.36363636363636365\n",
-      "0.8387096774193549\n"
+      "0.13333333333333333\n"
      ]
     }
    ],
    "source": [
     "w1 = wordnet.synset('bulldog.n.01')\n",
+    "w2 = wordnet.synset('poodle.n.01')\n",
+    "print(w1.wup_similarity(w2))\n",
+    "\n",
     "w2 = wordnet.synset('car.n.01')\n",
     "print(w1.wup_similarity(w2))\n",
-    "w1 = wordnet.synset('bulldog.n.01')\n",
-    "w2 = wordnet.synset('poodle.n.01')\n",
-    "print(w1.wup_similarity(w2))"
+    "\n",
+    "w2 = wordnet.synset('space.n.01')\n",
+    "print(w1.wup_similarity(w2))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 157,
+   "execution_count": 40,
    "metadata": {
     "collapsed": false
    },
@@ -206,8 +196,8 @@
      "text": [
       "{'bulldog', 'English_bulldog'}\n",
       "set()\n",
-      "{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n",
-      "{'poor_people', 'poor', 'lean'}\n"
+      "{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n",
+      "{'poor', 'poor_people', 'lean'}\n"
      ]
     }
    ],
diff --git a/Text Analytics - working with words.ipynb b/Text Analytics - working with words.ipynb
index e528af6..6d7f388 100644
--- a/Text Analytics - working with words.ipynb	
+++ b/Text Analytics - working with words.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 174,
+   "execution_count": 51,
    "metadata": {
     "collapsed": false
    },
@@ -11,20 +11,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "7 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'PetSmart', '?']\n"
+      "10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n"
      ]
     }
    ],
    "source": [
     "from nltk.tokenize import word_tokenize\n",
-    "sentence = \"Jim is bringing his bulldog PetSmart?\"\n",
+    "sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n",
     "tokens = word_tokenize(sentence)\n",
     "print (len(tokens), tokens)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 175,
+   "execution_count": 52,
    "metadata": {
     "collapsed": false
    },
@@ -33,7 +33,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "5 ['Jim', 'bringing', 'bulldog', 'PetSmart', '?']\n"
+      "6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n"
      ]
     }
    ],
@@ -46,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 176,
+   "execution_count": 53,
    "metadata": {
     "collapsed": false
    },
@@ -58,7 +58,8 @@
       "jim\n",
       "bring\n",
       "bulldog\n",
-      "petsmart\n",
+      "eat\n",
+      "friend\n",
       "?\n"
      ]
     }
@@ -72,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 177,
+   "execution_count": 54,
    "metadata": {
     "collapsed": false
    },
@@ -81,7 +82,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('PetSmart', 'NNP'), ('?', '.')] Nouns: []\n"
+      "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n"
      ]
     }
    ],
@@ -95,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 178,
+   "execution_count": 55,
    "metadata": {
     "collapsed": false
    },
@@ -104,8 +105,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('PetSmart', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
-      "[('PetSmart', 'NNP', 'B-ORGANIZATION')]\n"
+      "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n",
+      "[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n"
      ]
     }
    ],
@@ -131,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 179,
+   "execution_count": 56,
    "metadata": {
     "collapsed": false
    },
@@ -155,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 180,
+   "execution_count": 57,
    "metadata": {
     "collapsed": false
    },
@@ -164,23 +165,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "0.8387096774193549\n",
       "0.36363636363636365\n",
-      "0.8387096774193549\n"
+      "0.13333333333333333\n"
      ]
     }
    ],
    "source": [
     "w1 = wordnet.synset('bulldog.n.01')\n",
+    "w2 = wordnet.synset('poodle.n.01')\n",
+    "print(w1.wup_similarity(w2))\n",
+    "\n",
     "w2 = wordnet.synset('car.n.01')\n",
     "print(w1.wup_similarity(w2))\n",
-    "w1 = wordnet.synset('bulldog.n.01')\n",
-    "w2 = wordnet.synset('poodle.n.01')\n",
-    "print(w1.wup_similarity(w2))"
+    "\n",
+    "w2 = wordnet.synset('space.n.01')\n",
+    "print(w1.wup_similarity(w2))\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 181,
+   "execution_count": 58,
    "metadata": {
     "collapsed": false
    },
@@ -191,8 +196,8 @@
      "text": [
       "{'bulldog', 'English_bulldog'}\n",
       "set()\n",
-      "{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n",
-      "{'poor_people', 'poor', 'lean'}\n"
+      "{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n",
+      "{'poor', 'poor_people', 'lean'}\n"
      ]
     }
    ],