diff --git a/mmap_utils.py b/mmap_utils.py index 8a6e5a9..c3a4cf7 100644 --- a/mmap_utils.py +++ b/mmap_utils.py @@ -1,5 +1,5 @@ import os -from IPython.parallel import interactive +from ipyparallel import interactive @interactive diff --git a/model_selection.py b/model_selection.py index e56a04c..417667a 100644 --- a/model_selection.py +++ b/model_selection.py @@ -6,8 +6,8 @@ from collections import namedtuple import os -from IPython.parallel import interactive -from IPython.parallel import TaskAborted +from ipyparallel import interactive +from ipyparallel import TaskAborted from scipy.stats import sem import numpy as np @@ -203,7 +203,7 @@ def __repr__(self): def boxplot_parameters(self, display_train=False): """Plot boxplot for each parameters independently""" - import pylab as pl + import matplotlib.pyplot as pl results = [Evaluation(*task.get()) for task_group in self.task_groups for task in task_group diff --git a/notebooks/00 - Tutorial Setup .ipynb b/notebooks/00 - Tutorial Setup .ipynb index 2746f71..07c6936 100644 --- a/notebooks/00 - Tutorial Setup .ipynb +++ b/notebooks/00 - Tutorial Setup .ipynb @@ -1,779 +1,830 @@ { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tutorial Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check your install" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import scipy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import sklearn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import psutil" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import ipyparallel" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finding the location of an installed package and its version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "numpy.__path__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "numpy.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check that you have the datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%run ../fetch_data.py\n", + "# %run ../fetch_data.py twenty_newsgroups sentiment140 covertype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import os\n", + "for fname in os.listdir('../datasets/'):\n", + " print(fname)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A NumPy primer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### NumPy array dtypes and shapes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a = np.array([1, 2, 3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b = np.array([[0, 2, 4], [1, 3, 5]])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.zeros(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.ones(shape=(3, 4), dtype=np.int32)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Common array operations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c = b * 0.5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "c.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d = a + c" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d[0, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d.sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d.mean(axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reshaping and inplace update" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e = np.arange(12)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "f = e.reshape(3, 4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "f" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e[5:] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "e" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "f" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Combining arrays" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.concatenate([a, a, a])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.vstack([a, b, d])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "np.hstack([b, d])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A Matplotlib primer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x = np.linspace(0, 2, 10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(x, 'o-');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.plot(x, x, 'o-', label='linear')\n", + "plt.plot(x, x ** 2, 'x-', label='quadratic')\n", + "\n", + "plt.legend(loc='best')\n", + "plt.title('Linear vs Quadratic progression')\n", + "plt.xlabel('Input')\n", + "plt.ylabel('Output');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "samples = np.random.normal(loc=1.0, scale=0.5, size=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "samples.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "samples.dtype" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "samples[:30]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.hist(samples, bins=50);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "samples_1 = np.random.normal(loc=1, scale=.5, size=10000)\n", + "samples_2 = np.random.standard_t(df=10, size=10000)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "bins = np.linspace(-3, 3, 50)\n", + "_ = plt.hist(samples_1, bins=bins, alpha=0.5, label='samples 1')\n", + "_ = plt.hist(samples_2, bins=bins, alpha=0.5, label='samples 2')\n", + "plt.legend(loc='upper left');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "plt.scatter(samples_1, samples_2, alpha=0.1);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 2", "language": "python", - "name": "python3" + "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 3 + "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.5.0" - }, - "name": "" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ - { - "cells": [ - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Tutorial Setup" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Check your install" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import numpy" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import scipy" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import matplotlib" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import sklearn" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import psutil" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pandas" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import IPython.parallel" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finding the location of an installed package and its version:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "numpy.__path__" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "numpy.__version__" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Check that you have the datasets" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%run ../fetch_data.py\n", - "# %run ../fetch_data.py twenty_newsgroups sentiment140 covertype" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import os\n", - "for fname in os.listdir('../datasets/'):\n", - " print(fname)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "A NumPy primer" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "NumPy array dtypes and shapes" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import numpy as np" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a = np.array([1, 2, 3])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b = np.array([[0, 2, 4], [1, 3, 5]])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b.shape" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b.dtype" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a.shape" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a.dtype" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.zeros(5)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.ones(shape=(3, 4), dtype=np.int32)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Common array operations" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "c = b * 0.5" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "c" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "c.shape" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "c.dtype" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d = a + c" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d[0]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d[0, 0]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d[:, 0]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.sum()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.mean()" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.sum(axis=0)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d.mean(axis=1)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Reshaping and inplace update" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "e = np.arange(12)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "e" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "f = e.reshape(3, 4)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "f" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "e" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "e[5:] = 0" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "e" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "f" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Combining arrays" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "a" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "b" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "d" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.concatenate([a, a, a])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.vstack([a, b, d])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "np.hstack([b, d])" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "A Matplotlib primer" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%matplotlib inline" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import matplotlib.pyplot as plt" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "x = np.linspace(0, 2, 10)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "x" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.plot(x, 'o-');" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.plot(x, x, 'o-', label='linear')\n", - "plt.plot(x, x ** 2, 'x-', label='quadratic')\n", - "\n", - "plt.legend(loc='best')\n", - "plt.title('Linear vs Quadratic progression')\n", - "plt.xlabel('Input')\n", - "plt.ylabel('Output');" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "samples = np.random.normal(loc=1.0, scale=0.5, size=1000)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "samples.shape" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "samples.dtype" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "samples[:30]" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.hist(samples, bins=50);" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "samples_1 = np.random.normal(loc=1, scale=.5, size=10000)\n", - "samples_2 = np.random.standard_t(df=10, size=10000)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "bins = np.linspace(-3, 3, 50)\n", - "_ = plt.hist(samples_1, bins=bins, alpha=0.5, label='samples 1')\n", - "_ = plt.hist(samples_2, bins=bins, alpha=0.5, label='samples 2')\n", - "plt.legend(loc='upper left');" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "plt.scatter(samples_1, samples_2, alpha=0.1);" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": true, - "input": [], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": true, - "input": [], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} + "pygments_lexer": "ipython2", + "version": "2.7.10" } - ] + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/notebooks/06 - Distributed Model Selection and Assessment.ipynb b/notebooks/06 - Distributed Model Selection and Assessment.ipynb index 4565d48..e5ad0cc 100644 --- a/notebooks/06 - Distributed Model Selection and Assessment.ipynb +++ b/notebooks/06 - Distributed Model Selection and Assessment.ipynb @@ -38,7 +38,7 @@ "source": [ "Outline of the session:\n", "\n", - "- Introduction to **IPython.parallel**\n", + "- Introduction to **ipyparallel**\n", "- Sharing Data Between Processes with **Memory Mapping**\n", "- **Parallel Grid Search** and Model Selection\n", "- **Parallel** Computation of **Learning Curves** (TODO)\n", @@ -70,7 +70,7 @@ "level": 2, "metadata": {}, "source": [ - "IPython.parallel, a Primer" + "ipyparallel, a Primer" ] }, { @@ -79,11 +79,11 @@ "source": [ "This section gives a primer on some tools best utilizing computational resources when doing predictive modeling in the Python / NumPy ecosystem namely:\n", "\n", - "- optimal usage of available CPUs and cluster nodes with **`IPython.parallel`**\n", + "- optimal usage of available CPUs and cluster nodes with **`ipyparallel`**\n", "\n", "- optimal memory re-use using shared memory between Python processes using **`numpy.memmap`** and **`joblib`**\n", "\n", - "### What is so great about `IPython.parallel`:\n", + "### What is so great about `ipyparallel`:\n", "\n", "- Single node multi-CPUs\n", "- Multiple node multi-CPUs\n", @@ -127,7 +127,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "client = Client()" ], "language": "python", @@ -771,7 +771,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "client = Client()\n", "len(client)" ], @@ -1406,7 +1406,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's leverage IPython.parallel and the Memory Mapping features of joblib to write a custom grid search utility that runs on cluster in a memory efficient manner.\n", + "Let's leverage ipyparallel and the Memory Mapping features of joblib to write a custom grid search utility that runs on cluster in a memory efficient manner.\n", "\n", "Assume that we want to reproduce the grid search from the previous session:" ] @@ -1512,7 +1512,7 @@ "collapsed": false, "input": [ "from sklearn.svm import SVC\n", - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "\n", "client = Client()\n", "lb_view = client.load_balanced_view()\n", @@ -1844,7 +1844,7 @@ " setup_class = starcluster.plugins.pypkginstaller.PyPkgInstaller\n", " packages = scikit-learn, psutil\n", " \n", - " # Base configuration for IPython.parallel cluster\n", + " # Base configuration for ipyparallel cluster\n", " [cluster iptemplate]\n", " KEYNAME = mykey\n", " CLUSTER_SIZE = 1\n", @@ -1917,7 +1917,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "When using the `LoadBalancedView` API of `IPython.parallel.Client` is it possible to dynamically grow the cluster to shorten the duration of the processing of a queue of task without having to restart from scratch.\n", + "When using the `LoadBalancedView` API of `ipyparallel.Client` is it possible to dynamically grow the cluster to shorten the duration of the processing of a queue of task without having to restart from scratch.\n", "\n", "This can be achieved using the `addnode` command, for instance to add 3 more nodes using $0.50 bid price on the Spot Instance market:\n", " \n", @@ -1925,7 +1925,7 @@ " \n", "Each node will automatically run the `IPCluster` plugin and register new `IPEngine` processes to the existing `IPController` process running on master.\n", "\n", - "It is also possible to terminate individual running nodes of the cluster with `removenode` command but this will kill any task running on that node and IPython.parallel will **not** restart the failed task automatically." + "It is also possible to terminate individual running nodes of the cluster with `removenode` command but this will kill any task running on that node and ipyparallel will **not** restart the failed task automatically." ] }, { diff --git a/notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb b/notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb index 38bb164..49a4cb3 100644 --- a/notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb +++ b/notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb @@ -847,14 +847,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's use IPython parallel to read partitions of the train CSV in different Python processes using the interactive IPython.parallel interface:" + "Let's use ipyparallel to read partitions of the train CSV in different Python processes using the interactive ipyparallel interface:" ] }, { "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "\n", "client = Client()\n", "len(client)" @@ -958,7 +958,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We are now ready to read the data partition from the CSV file, vectorize it, and train an indepenent model on each IPython.parallel engine:" + "We are now ready to read the data partition from the CSV file, vectorize it, and train an indepenent model on each ipyparallel engine:" ] }, { diff --git a/rendered_notebooks/00 - Tutorial Setup .ipynb b/rendered_notebooks/00 - Tutorial Setup .ipynb index f4d905e..57fcf26 100644 --- a/rendered_notebooks/00 - Tutorial Setup .ipynb +++ b/rendered_notebooks/00 - Tutorial Setup .ipynb @@ -110,7 +110,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "import IPython.parallel" + "import ipyparallel" ], "language": "python", "metadata": {}, diff --git a/rendered_notebooks/06 - Distributed Model Selection and Assessment.ipynb b/rendered_notebooks/06 - Distributed Model Selection and Assessment.ipynb index d81e454..69ba583 100644 --- a/rendered_notebooks/06 - Distributed Model Selection and Assessment.ipynb +++ b/rendered_notebooks/06 - Distributed Model Selection and Assessment.ipynb @@ -38,7 +38,7 @@ "source": [ "Outline of the session:\n", "\n", - "- Introduction to **IPython.parallel**\n", + "- Introduction to **ipyparallel**\n", "- Sharing Data Between Processes with **Memory Mapping**\n", "- **Parallel Grid Search** and Model Selection\n", "- **Parallel** Computation of **Learning Curves** (TODO)\n", @@ -70,7 +70,7 @@ "level": 2, "metadata": {}, "source": [ - "IPython.parallel, a Primer" + "ipyparallel, a Primer" ] }, { @@ -79,11 +79,11 @@ "source": [ "This section gives a primer on some tools best utilizing computational resources when doing predictive modeling in the Python / NumPy ecosystem namely:\n", "\n", - "- optimal usage of available CPUs and cluster nodes with **`IPython.parallel`**\n", + "- optimal usage of available CPUs and cluster nodes with **`ipyparallel`**\n", "\n", "- optimal memory re-use using shared memory between Python processes using **`numpy.memmap`** and **`joblib`**\n", "\n", - "### What is so great about `IPython.parallel`:\n", + "### What is so great about `ipyparallel`:\n", "\n", "- Single node multi-CPUs\n", "- Multiple node multi-CPUs\n", @@ -137,7 +137,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "client = Client()" ], "language": "python", @@ -1091,7 +1091,7 @@ "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "client = Client()\n", "len(client)" ], @@ -2105,7 +2105,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's leverage IPython.parallel and the Memory Mapping features of joblib to write a custom grid search utility that runs on cluster in a memory efficient manner.\n", + "Let's leverage ipyparallel and the Memory Mapping features of joblib to write a custom grid search utility that runs on cluster in a memory efficient manner.\n", "\n", "Assume that we want to reproduce the grid search from the previous session:" ] @@ -2252,7 +2252,7 @@ "collapsed": false, "input": [ "from sklearn.svm import SVC\n", - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "\n", "client = Client()\n", "lb_view = client.load_balanced_view()\n", @@ -2708,7 +2708,7 @@ " setup_class = starcluster.plugins.pypkginstaller.PyPkgInstaller\n", " packages = scikit-learn, psutil\n", " \n", - " # Base configuration for IPython.parallel cluster\n", + " # Base configuration for ipyparallel cluster\n", " [cluster iptemplate]\n", " KEYNAME = mykey\n", " CLUSTER_SIZE = 1\n", @@ -2781,7 +2781,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "When using the `LoadBalancedView` API of `IPython.parallel.Client` is it possible to dynamically grow the cluster to shorten the duration of the processing of a queue of task without having to restart from scratch.\n", + "When using the `LoadBalancedView` API of `ipyparallel.Client` is it possible to dynamically grow the cluster to shorten the duration of the processing of a queue of task without having to restart from scratch.\n", "\n", "This can be achieved using the `addnode` command, for instance to add 3 more nodes using $0.50 bid price on the Spot Instance market:\n", " \n", @@ -2789,7 +2789,7 @@ " \n", "Each node will automatically run the `IPCluster` plugin and register new `IPEngine` processes to the existing `IPController` process running on master.\n", "\n", - "It is also possible to terminate individual running nodes of the cluster with `removenode` command but this will kill any task running on that node and IPython.parallel will **not** restart the failed task automatically." + "It is also possible to terminate individual running nodes of the cluster with `removenode` command but this will kill any task running on that node and ipyparallel will **not** restart the failed task automatically." ] }, { diff --git a/rendered_notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb b/rendered_notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb index 544c6d4..37581c9 100644 --- a/rendered_notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb +++ b/rendered_notebooks/08 - Large Scale Text Classification for Sentiment Analysis.ipynb @@ -1370,14 +1370,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's use IPython parallel to read partitions of the train CSV in different Python processes using the interactive IPython.parallel interface:" + "Let's use ipyparallel to read partitions of the train CSV in different Python processes using the interactive ipyparallel interface:" ] }, { "cell_type": "code", "collapsed": false, "input": [ - "from IPython.parallel import Client\n", + "from ipyparallel import Client\n", "\n", "client = Client()\n", "len(client)" @@ -1526,7 +1526,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We are now ready to read the data partition from the CSV file, vectorize it, and train an indepenent model on each IPython.parallel engine:" + "We are now ready to read the data partition from the CSV file, vectorize it, and train an indepenent model on each ipyparallel engine:" ] }, {