coding-blocks-archives
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎class_12/.ipynb_checkpoints/CNN-checkpoint.ipynb
Lines changed: 370 additions & 0 deletions b/‎class_12/.ipynb_checkpoints/CNN-checkpoint.ipynb
Lines changed: 370 additions & 0 deletions
diff --git a/‎class_12/.ipynb_checkpoints/Convolutions-checkpoint.ipynb
Lines changed: 105 additions & 3 deletions b/‎class_12/.ipynb_checkpoints/Convolutions-checkpoint.ipynb
Lines changed: 105 additions & 3 deletions
diff --git a/‎class_12/.ipynb_checkpoints/w2v_dataprocess-checkpoint.ipynb
Lines changed: 233 additions & 0 deletions b/‎class_12/.ipynb_checkpoints/w2v_dataprocess-checkpoint.ipynb
Lines changed: 233 additions & 0 deletions
diff --git a/‎class_12/CNN.ipynb
Lines changed: 381 additions & 0 deletions b/‎class_12/CNN.ipynb
Lines changed: 381 additions & 0 deletions
diff --git a/‎class_12/Convolutions.ipynb
Lines changed: 150 additions & 3 deletions b/‎class_12/Convolutions.ipynb
Lines changed: 150 additions & 3 deletions
@@ -1,2 +1,4 @@
 class_03/data/*
 *.pyc
+
+*.npy
@@ -0,0 +1,233 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "f = open('./data.txt')\n",
+    "d = f.read()\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "data = d[1260:]\n",
+    "data = data.lower().decode('utf-8')\n",
+    "import re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "p = re.sub('[^A-Za-z]+', ' ', data)\n",
+    "ds = p.split()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "u = np.unique(ds, return_counts=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "bow = {}\n",
+    "rev_bow = {}\n",
+    "i = 0\n",
+    "for ix in range(len(u[0])):\n",
+    "    if u[1][ix] > 2:\n",
+    "        bow[i] = u[0][ix]\n",
+    "        rev_bow[u[0][ix]] = i\n",
+    "        i += 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 146,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1781"
+      ]
+     },
+     "execution_count": 146,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(bow)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 147,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def get_one_hot_vector(word):\n",
+    "    vec = np.zeros((len(bow),))\n",
+    "    vec[rev_bow[word]] = 1.0\n",
+    "    \n",
+    "    return vec\n",
+    "\n",
+    "def get_word_from_vec(vec):\n",
+    "    ind = np.argmax(vec)\n",
+    "    \n",
+    "    return bow[ind]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tree\n"
+     ]
+    }
+   ],
+   "source": [
+    "a = get_one_hot_vector('tree')\n",
+    "a_ = get_word_from_vec(a)\n",
+    "\n",
+    "print a_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "metadata": {
+    "collapsed": false,
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "all_data = p.split()\n",
+    "len(all_data)\n",
+    "\n",
+    "dataset = []#np.zeros((len(all_data), len(bow)))\n",
+    "# print dataset.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 157,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "for w in range(len(all_data)):\n",
+    "    try:\n",
+    "        dataset.append(get_one_hot_vector(all_data[w]))\n",
+    "    except:\n",
+    "        pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(35456, 1781)\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataset = np.asarray(dataset)\n",
+    "print dataset.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "np.save('all_word_data', dataset)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
-Original file line number
+Diff line change
@@ @@ -1,2 +1,4 @@ @@
 class_03/data/*
 *.pyc
++
 +*.npy