Module 6 Assignment¶

A few things you should keep in mind when working on assignments:

Make sure you fill in any place that says YOUR CODE HERE. Do not write your answer in anywhere else other than where it says YOUR CODE HERE. Anything you write anywhere else will be removed or overwritten by the autograder.
Before you submit your assignment, make sure everything runs as expected. Go to menubar, select Kernel, and restart the kernel and run all cells (Restart & Run all).
Do not change the title (i.e. file name) of this notebook.
Make sure that you save your work (in the menubar, select File → Save and CheckPoint)

%matplotlib inline

import numpy as np
import scipy.stats as stats

import pandas as pd
import seaborn as sns

from nose.tools import assert_almost_equal, assert_equal, assert_is_instance, assert_is_not

import matplotlib as mpl
import matplotlib.pyplot as plt

Problem 1: Calculating a Simple Probability¶

Write a function called "probability_calc" that takes in the number of observed heads from a coin flip experiment and the total number of coin flips, and returns the probability that the coin lands on heads.

def probability_calc(num_heads,n):
    """
    Inputs
    ------
    
    num_heads: an integer, the number of heads observed
    
    n: an integer, the total number of coin flips
    
    Output
    ------
    
    prob: the probability of the coin landing on heads
    """    
    
    ### YOUR CODE HERE

my_prob = probability_calc(37, 233)
assert_almost_equal(my_prob, 0.15879828326180256)

Problem 2: Using Bayes Theorem¶

Write a function called "bayes_calc" which takes in the probability of event A, and the probability of event B and return $P(A|B)$. Assume that A and B are independent events.

def bayes_calc(prob_A, prob_B):
    """
    Inputs
    -------
    
    prob_A: a float, the probability of event A
    
    prob_B: a float, the probability of event B
    
    Output
    ------
    
    prob: the probability of event A given event B
    
    """
    
    ### YOUR CODE HERE

my_prob = bayes_calc(15 / 66, 13 / 68)
assert_almost_equal(my_prob, 0.227272, places=5)

Problem 3: Calculating a Composite Probability¶

Write a function called "composite_prob" which takes in a numpy array of data, called "data", a value called "cutoff" and calculates the probability that a data point in "data" is greater than "cutoff":

def composite_prob(data,cutoff):
    """
    Inputs
    -------
    
    data: a numpy array of data
    
    cutoff: a float, the cutoff value
    
    Output
    ------
    
    prob: the probability of a data point in data being larger than cutoff
    
    """

    ### YOUR CODE HERE

my_prob = composite_prob(np.linspace(0,1, 365), .7)
assert_almost_equal(my_prob, 0.301369, places=5)

# Read in dow jones index dataset
df = pd.read_csv('./dow_jones_index.data')
df.head()

Problem 4: Possion distribution of columns from dow jones dataset¶

In this problem you'll finish writing the plot_poisson function. A dataframe of the dow jones dataset and the column name are passed in. Your task is to:

plot the PMF of the Poisson distribution for column col using the bar function from Axes.
- Use the mean of the column to create the Poisson distirbution
- The number of points is given by n_pts
Your plot should contain a label for the x and y axis and also have a title.
Return the axes object

def plot_poisson(df, col='open', n_pts=100):
    '''
    df: dataframe containg data from dow jones index
    col: column name in df variable
    n_pts: number of points
    returns axes object
    '''
    # Define plot layout
    fig, ax = plt.subplots(figsize=(10, 5))
    
    ### YOUR CODE HERE
    return ax

ch = plot_poisson(df,'open')

from helper import pp
print("Your Plot should look similar to this:")
sol = pp(df)

assert_is_instance(ch, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_instance(ch, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_not(len(ch.title.get_text()), 0, msg="Your plot doesn't have a title.")
assert_is_not(ch.xaxis.get_label_text(), '', msg="Change the x-axis label to something more descriptive.")
assert_is_not(ch.yaxis.get_label_text(), '', msg="Change the y-axis label to something more descriptive.")
assert_equal(len(ch.patches), len(sol.patches), msg="Your bar graph doesn't have the correct number patches. Make sure you use the n_pts parameter. ")
for student_patch, solution_patch in zip(ch.patches, sol.patches):
    assert_equal(student_patch.get_width(), solution_patch.get_width(), msg='')
    assert_equal(student_patch.get_height(), solution_patch.get_height(), msg='')

Problem 5: Extending Problem 4 to work CMF and SF¶

In this problem you'll finish writing the plot function. A dataframe of the dow jones dataset, string containing pmf, cdf, or sf and the column name are passed in. Your task is to:

plot the approriate function of the Poisson distribution (PMF, CDF, or SF) which is given by dist. (Use the bar function from Axes.)
Use the mean of the column to create the Poisson distirbution
Your plot should contain a label for the x and y axis and also have a title.
Return the Axes object

def plot(df, dist, col='open', n_pts=100):
    '''
    df: dataframe containg data from dow jones index
    dist: string speicfying function to use. This contains: 'pmf', 'cdf', or 'sf'
    col: column name in df variable
    returns axes object
    '''
    # Define plot layout
    fig, ax = plt.subplots(figsize=(10, 5))
    
    ### YOUR CODE HERE
    return ax

ax_pmf = plot(df, 'pmf', col='open')

from helper import p
sol1 = p(df,'pmf', col='open')

assert_is_instance(ax_pmf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_instance(ax_pmf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_not(len(ax_pmf.title.get_text()), 0, msg="Your plot doesn't have a title.")
assert_is_not(ax_pmf.xaxis.get_label_text(), '', msg="Change the x-axis label to something more descriptive.")
assert_is_not(ax_pmf.yaxis.get_label_text(), '', msg="Change the y-axis label to something more descriptive.")
assert_equal(len(ax_pmf.patches), len(sol1.patches), msg="Your bar graph doesn't have the correct number patches. Make sure you use the n_pts parameter. ")
for student_patch, solution_patch in zip(ax_pmf.patches, sol1.patches):
    assert_equal(student_patch.get_width(), solution_patch.get_width(), msg='')
    assert_equal(student_patch.get_height(), solution_patch.get_height(), msg='')

ax_cmf = plot(df, 'cdf', 'close')

print("Your plot using CMF should look like this:")
sol2 = p(df,'cdf', col='close')

assert_is_instance(ax_cmf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_instance(ax_cmf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_not(len(ax_cmf.title.get_text()), 0, msg="Your plot doesn't have a title.")
assert_is_not(ax_cmf.xaxis.get_label_text(), '', msg="Change the x-axis label to something more descriptive.")
assert_is_not(ax_cmf.yaxis.get_label_text(), '', msg="Change the y-axis label to something more descriptive.")

assert_equal(len(ax_cmf.patches), len(sol2.patches), msg="Your bar graph doesn't have the correct number patches. Make sure you use the n_pts parameter. ")
for student_patch, solution_patch in zip(ax_cmf.patches, sol2.patches):
    assert_equal(student_patch.get_width(), solution_patch.get_width(), msg='')
    assert_equal(student_patch.get_height(), solution_patch.get_height(), msg='')

ax_sf = plot(df, 'sf', 'close')

print("Your Plot should look similar to this:")
sol3 = p(df,'sf', col='close')

assert_is_instance(ax_sf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_instance(ax_sf, mpl.axes.Axes, msg='Return a Axes object.')  
assert_is_not(len(ax_sf.title.get_text()), 0, msg="Your plot doesn't have a title.")
assert_is_not(ax_sf.xaxis.get_label_text(), '', msg="Change the x-axis label to something more descriptive.")
assert_is_not(ax_sf.yaxis.get_label_text(), '', msg="Change the y-axis label to something more descriptive.")

assert_equal(len(ax_sf.patches), len(sol3.patches), msg="Your bar graph doesn't have the correct number patches. Make sure you use the n_pts parameter. ")
for student_patch, solution_patch in zip(ax_sf.patches, sol3.patches):
    assert_equal(student_patch.get_width(), solution_patch.get_width(), msg='')
    assert_equal(student_patch.get_height(), solution_patch.get_height(), msg='')

This notebook is released under the Creative Commons license CC BY-NC-SA 4.0. Any reproduction, adaptation, distribution, dissemination or making available of this notebook for commercial use is not allowed unless authorized in writing by the copyright holder.