From 2a72d623b8e580d7042d2e1c83e294c7f667e690 Mon Sep 17 00:00:00 2001 From: ugik Date: Fri, 3 Feb 2017 15:24:02 -0500 Subject: [PATCH] update NLTK example --- ...tics - working with words-checkpoint.ipynb | 62 ++++++++----------- Text Analytics - working with words.ipynb | 47 +++++++------- 2 files changed, 52 insertions(+), 57 deletions(-) diff --git a/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb b/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb index 9b42211..866d8c6 100644 --- a/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb +++ b/.ipynb_checkpoints/Text Analytics - working with words-checkpoint.ipynb @@ -1,23 +1,8 @@ { "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [] - }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 33, "metadata": { "collapsed": false }, @@ -26,20 +11,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "8 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'Walmart', '?']\n" + "10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n" ] } ], "source": [ "from nltk.tokenize import word_tokenize\n", - "sentence = \"Jim is bringing his bulldog to Walmart?\"\n", + "sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n", "tokens = word_tokenize(sentence)\n", "print (len(tokens), tokens)" ] }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 34, "metadata": { "collapsed": false }, @@ -48,7 +33,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "5 ['Jim', 'bringing', 'bulldog', 'Walmart', '?']\n" + "6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n" ] } ], @@ -61,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 41, "metadata": { "collapsed": false }, @@ -73,7 +58,8 @@ "jim\n", "bring\n", "bulldog\n", - "walmart\n", + "eat\n", + "friend\n", "?\n" ] } @@ -87,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -96,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('Walmart', 'NNP'), ('?', '.')] Nouns: []\n" + "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n" ] } ], @@ -110,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 37, "metadata": { "collapsed": false }, @@ -119,8 +105,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('Walmart', 'NNP', 'B-PERSON'), ('?', '.', 'O')]\n", - "[]\n" + "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n", + "[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n" ] } ], @@ -146,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": 38, "metadata": { "collapsed": false }, @@ -170,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": 39, "metadata": { "collapsed": false }, @@ -179,23 +165,27 @@ "name": "stdout", "output_type": "stream", "text": [ + "0.8387096774193549\n", "0.36363636363636365\n", - "0.8387096774193549\n" + "0.13333333333333333\n" ] } ], "source": [ "w1 = wordnet.synset('bulldog.n.01')\n", + "w2 = wordnet.synset('poodle.n.01')\n", + "print(w1.wup_similarity(w2))\n", + "\n", "w2 = wordnet.synset('car.n.01')\n", "print(w1.wup_similarity(w2))\n", - "w1 = wordnet.synset('bulldog.n.01')\n", - "w2 = wordnet.synset('poodle.n.01')\n", - "print(w1.wup_similarity(w2))" + "\n", + "w2 = wordnet.synset('space.n.01')\n", + "print(w1.wup_similarity(w2))\n" ] }, { "cell_type": "code", - "execution_count": 157, + "execution_count": 40, "metadata": { "collapsed": false }, @@ -206,8 +196,8 @@ "text": [ "{'bulldog', 'English_bulldog'}\n", "set()\n", - "{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n", - "{'poor_people', 'poor', 'lean'}\n" + "{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n", + "{'poor', 'poor_people', 'lean'}\n" ] } ], diff --git a/Text Analytics - working with words.ipynb b/Text Analytics - working with words.ipynb index e528af6..6d7f388 100644 --- a/Text Analytics - working with words.ipynb +++ b/Text Analytics - working with words.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 174, + "execution_count": 51, "metadata": { "collapsed": false }, @@ -11,20 +11,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "7 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'PetSmart', '?']\n" + "10 ['Jim', 'is', 'bringing', 'his', 'bulldog', 'to', 'eat', 'at', 'Friendlys', '?']\n" ] } ], "source": [ "from nltk.tokenize import word_tokenize\n", - "sentence = \"Jim is bringing his bulldog PetSmart?\"\n", + "sentence = \"Jim is bringing his bulldog to eat at Friendlys?\"\n", "tokens = word_tokenize(sentence)\n", "print (len(tokens), tokens)" ] }, { "cell_type": "code", - "execution_count": 175, + "execution_count": 52, "metadata": { "collapsed": false }, @@ -33,7 +33,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "5 ['Jim', 'bringing', 'bulldog', 'PetSmart', '?']\n" + "6 ['Jim', 'bringing', 'bulldog', 'eat', 'Friendlys', '?']\n" ] } ], @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": 53, "metadata": { "collapsed": false }, @@ -58,7 +58,8 @@ "jim\n", "bring\n", "bulldog\n", - "petsmart\n", + "eat\n", + "friend\n", "?\n" ] } @@ -72,7 +73,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": 54, "metadata": { "collapsed": false }, @@ -81,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('PetSmart', 'NNP'), ('?', '.')] Nouns: []\n" + "[('Jim', 'NNP'), ('bringing', 'VBG'), ('bulldog', 'JJ'), ('eat', 'NN'), ('Friendlys', 'NNP'), ('?', '.')] Nouns: [('eat', 'NN')]\n" ] } ], @@ -95,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": 55, "metadata": { "collapsed": false }, @@ -104,8 +105,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('PetSmart', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n", - "[('PetSmart', 'NNP', 'B-ORGANIZATION')]\n" + "[('Jim', 'NNP', 'B-PERSON'), ('is', 'VBZ', 'O'), ('bringing', 'VBG', 'O'), ('his', 'PRP$', 'O'), ('bulldog', 'NN', 'O'), ('to', 'TO', 'O'), ('eat', 'VB', 'O'), ('at', 'IN', 'O'), ('Friendlys', 'NNP', 'B-ORGANIZATION'), ('?', '.', 'O')]\n", + "[('Friendlys', 'NNP', 'B-ORGANIZATION')]\n" ] } ], @@ -131,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": 56, "metadata": { "collapsed": false }, @@ -155,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": 57, "metadata": { "collapsed": false }, @@ -164,23 +165,27 @@ "name": "stdout", "output_type": "stream", "text": [ + "0.8387096774193549\n", "0.36363636363636365\n", - "0.8387096774193549\n" + "0.13333333333333333\n" ] } ], "source": [ "w1 = wordnet.synset('bulldog.n.01')\n", + "w2 = wordnet.synset('poodle.n.01')\n", + "print(w1.wup_similarity(w2))\n", + "\n", "w2 = wordnet.synset('car.n.01')\n", "print(w1.wup_similarity(w2))\n", - "w1 = wordnet.synset('bulldog.n.01')\n", - "w2 = wordnet.synset('poodle.n.01')\n", - "print(w1.wup_similarity(w2))" + "\n", + "w2 = wordnet.synset('space.n.01')\n", + "print(w1.wup_similarity(w2))\n" ] }, { "cell_type": "code", - "execution_count": 181, + "execution_count": 58, "metadata": { "collapsed": false }, @@ -191,8 +196,8 @@ "text": [ "{'bulldog', 'English_bulldog'}\n", "set()\n", - "{'rich', 'deep', 'ample', 'full-bodied', 'robust', 'plenteous', 'rich_people', 'fertile', 'productive', 'racy', 'copious', 'plentiful', 'fat'}\n", - "{'poor_people', 'poor', 'lean'}\n" + "{'productive', 'deep', 'racy', 'copious', 'ample', 'fat', 'robust', 'plenteous', 'plentiful', 'fertile', 'rich_people', 'full-bodied', 'rich'}\n", + "{'poor', 'poor_people', 'lean'}\n" ] } ],