A few things you should keep in mind when working on assignments:
Make sure you fill in any place that says YOUR CODE HERE
. Do not write your answer in anywhere else other than where it says YOUR CODE HERE
. Anything you write anywhere else will be removed or overwritten by the autograder.
Before you submit your assignment, make sure everything runs as expected. Go to menubar, select Kernel, and restart the kernel and run all cells (Restart & Run all).
Do not change the title (i.e. file name) of this notebook.
Make sure that you save your work (in the menubar, select File → Save and CheckPoint)
import csv
from nose.tools import assert_equal, assert_almost_equal
import pandas as pd
In this problem you will finish writing a function called read_data that reads the iris dataset. The columns are in this order:
There are multiple ways to solve this problem but ultimately this function should:
def read_data(file_path):
'''
This function reads data from the iris data and returns 5 lists (1 for each column)
file_path: string that specifies path to iris dataset
'''
### YOUR CODE HERE
file_path = './iris.csv'
sepal_len, petal_len, sepal_wid, petal_wid, class_ = read_data(file_path)
assert_equal(sepal_len[0:50], [5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4,
4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1,
4.6, 5.1, 4.8, 5.0, 5.0, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2,
5.5, 4.9, 5.0, 5.5, 4.9, 4.4, 5.1, 5.0, 4.5, 4.4, 5.0,
5.1, 4.8, 5.1, 4.6, 5.3, 5.0])
assert_equal(sepal_len[75:100], [6.6, 6.8, 6.7, 6.0, 5.7, 5.5, 5.5, 5.8, 6.0, 5.4, 6.0,
6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5.0, 5.6, 5.7, 5.7,
6.2, 5.1, 5.7])
assert_equal(petal_len[0:25], [1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5,
1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5,
1.0, 1.7, 1.9])
assert_equal(petal_len[44:67], [1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4.0, 4.6,
4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4.0, 4.7, 3.6, 4.4, 4.5])
assert_equal(sepal_wid[0:33], [3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7,
3.4, 3.0, 3.0, 4.0, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7,
3.6, 3.3, 3.4, 3.0, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1])
assert_equal(sepal_wid[44:100], [3.8, 3.0, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8,
2.8, 3.3, 2.4, 2.9, 2.7, 2.0, 3.0, 2.2, 2.9, 2.9, 3.1,
3.0, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3.0, 2.8,
3.0, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3.0, 3.4, 3.1, 2.3,
3.0, 2.5, 2.6, 3.0, 2.6, 2.3, 2.7, 3.0, 2.9, 2.9, 2.5, 2.8])
assert_equal(petal_wid[15:40], [0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4,
0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.1, 0.2, 0.2, 0.1, 0.2,
0.2])
assert_equal(petal_wid[45:78], [0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6,
1.0, 1.3, 1.4, 1.0, 1.5, 1.0, 1.4, 1.3, 1.4, 1.5, 1.0, 1.5,
1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7])
assert_equal(class_[45:55], ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor'])
assert_equal(class_[75:90], ['Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor'])
Recall that the column names of the data frame, df, can all be found at once using "df.columns".
In this problem you will use df.columns to find the name of the first two columns of df, and save it in a variable named "firsttwo_column_names".
Hint: Treat df.columns like an array or a list.
df = pd.read_csv('./iris.csv')
### YOUR CODE HERE
assert_equal(type(firsttwo_column_names), pd.indexes.base.Index)
assert_equal(len(firsttwo_column_names), 2)
Now that we read data from the iris dataset we will write a subset of it to another file. The following function write_file takes in a file_path to write to, all 5 columns from the iris dataset and the amount of lines to write to the new file.
For this problem do the following:
def write_file(file_path, sepal_len, petal_len, sepal_wid, petal_wid, class_, amount=25):
'''
This function writes up to `amount` rows using the 5 columns of the iris data
to file named file_path
file_path : string path to write iris dataset to
sepal_len : sepal length in cm column in iris dataset
petal_len : petal length in cm column in iris dataset
sepal_wid : sepal width in cm column in iris dataset
petal_wid : petal width in cm column in iris dataset
class_ : class column in iris dataset
amount: Amount of rows to write to file_path
'''
### YOUR CODE HERE
iris25rows_path = './iris25.csv'
write_file(iris25rows_path, sepal_len, petal_len, sepal_wid, petal_wid, class_, amount=25)
column3 = []
column5 = []
with open(iris25rows_path, 'r') as f:
for row in csv.reader(f):
column3.append(row[2])
column5.append(row[4])
assert_equal(column3, ['3.5', '3.0', '3.2', '3.1', '3.6', '3.9', '3.4', '3.4',
'2.9', '3.1', '3.7', '3.4', '3.0', '3.0', '4.0', '4.4',
'3.9', '3.5', '3.8', '3.8', '3.4', '3.7', '3.6', '3.3', '3.4'])
assert_equal(column5, ['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
'Iris-setosa'])
iris99rows_path = './iris99.csv'
write_file(iris99rows_path, sepal_len, petal_len, sepal_wid, petal_wid, class_, amount=100)
data = []
with open(iris99rows_path, 'r') as f:
for row in csv.reader(f):
data.append(row)
assert_equal(data, [['5.1', '1.4', '3.5', '0.2', 'Iris-setosa'], ['4.9', '1.4', '3.0', '0.2', 'Iris-setosa'], ['4.7', '1.3', '3.2', '0.2', 'Iris-setosa'], ['4.6', '1.5', '3.1', '0.2', 'Iris-setosa'], ['5.0', '1.4', '3.6', '0.2', 'Iris-setosa'], ['5.4', '1.7', '3.9', '0.4', 'Iris-setosa'], ['4.6', '1.4', '3.4', '0.3', 'Iris-setosa'], ['5.0', '1.5', '3.4', '0.2', 'Iris-setosa'], ['4.4', '1.4', '2.9', '0.2', 'Iris-setosa'], ['4.9', '1.5', '3.1', '0.1', 'Iris-setosa'], ['5.4', '1.5', '3.7', '0.2', 'Iris-setosa'], ['4.8', '1.6', '3.4', '0.2', 'Iris-setosa'], ['4.8', '1.4', '3.0', '0.1', 'Iris-setosa'], ['4.3', '1.1', '3.0', '0.1', 'Iris-setosa'], ['5.8', '1.2', '4.0', '0.2', 'Iris-setosa'], ['5.7', '1.5', '4.4', '0.4', 'Iris-setosa'], ['5.4', '1.3', '3.9', '0.4', 'Iris-setosa'], ['5.1', '1.4', '3.5', '0.3', 'Iris-setosa'], ['5.7', '1.7', '3.8', '0.3', 'Iris-setosa'], ['5.1', '1.5', '3.8', '0.3', 'Iris-setosa'], ['5.4', '1.7', '3.4', '0.2', 'Iris-setosa'], ['5.1', '1.5', '3.7', '0.4', 'Iris-setosa'], ['4.6', '1.0', '3.6', '0.2', 'Iris-setosa'], ['5.1', '1.7', '3.3', '0.5', 'Iris-setosa'], ['4.8', '1.9', '3.4', '0.2', 'Iris-setosa'], ['5.0', '1.6', '3.0', '0.2', 'Iris-setosa'], ['5.0', '1.6', '3.4', '0.4', 'Iris-setosa'], ['5.2', '1.5', '3.5', '0.2', 'Iris-setosa'], ['5.2', '1.4', '3.4', '0.2', 'Iris-setosa'], ['4.7', '1.6', '3.2', '0.2', 'Iris-setosa'], ['4.8', '1.6', '3.1', '0.2', 'Iris-setosa'], ['5.4', '1.5', '3.4', '0.4', 'Iris-setosa'], ['5.2', '1.5', '4.1', '0.1', 'Iris-setosa'], ['5.5', '1.4', '4.2', '0.2', 'Iris-setosa'], ['4.9', '1.5', '3.1', '0.1', 'Iris-setosa'], ['5.0', '1.2', '3.2', '0.2', 'Iris-setosa'], ['5.5', '1.3', '3.5', '0.2', 'Iris-setosa'], ['4.9', '1.5', '3.1', '0.1', 'Iris-setosa'], ['4.4', '1.3', '3.0', '0.2', 'Iris-setosa'], ['5.1', '1.5', '3.4', '0.2', 'Iris-setosa'], ['5.0', '1.3', '3.5', '0.3', 'Iris-setosa'], ['4.5', '1.3', '2.3', '0.3', 'Iris-setosa'], ['4.4', '1.3', '3.2', '0.2', 'Iris-setosa'], ['5.0', '1.6', '3.5', '0.6', 'Iris-setosa'], ['5.1', '1.9', '3.8', '0.4', 'Iris-setosa'], ['4.8', '1.4', '3.0', '0.3', 'Iris-setosa'], ['5.1', '1.6', '3.8', '0.2', 'Iris-setosa'], ['4.6', '1.4', '3.2', '0.2', 'Iris-setosa'], ['5.3', '1.5', '3.7', '0.2', 'Iris-setosa'], ['5.0', '1.4', '3.3', '0.2', 'Iris-setosa'], ['7.0', '4.7', '3.2', '1.4', 'Iris-versicolor'], ['6.4', '4.5', '3.2', '1.5', 'Iris-versicolor'], ['6.9', '4.9', '3.1', '1.5', 'Iris-versicolor'], ['5.5', '4.0', '2.3', '1.3', 'Iris-versicolor'], ['6.5', '4.6', '2.8', '1.5', 'Iris-versicolor'], ['5.7', '4.5', '2.8', '1.3', 'Iris-versicolor'], ['6.3', '4.7', '3.3', '1.6', 'Iris-versicolor'], ['4.9', '3.3', '2.4', '1.0', 'Iris-versicolor'], ['6.6', '4.6', '2.9', '1.3', 'Iris-versicolor'], ['5.2', '3.9', '2.7', '1.4', 'Iris-versicolor'], ['5.0', '3.5', '2.0', '1.0', 'Iris-versicolor'], ['5.9', '4.2', '3.0', '1.5', 'Iris-versicolor'], ['6.0', '4.0', '2.2', '1.0', 'Iris-versicolor'], ['6.1', '4.7', '2.9', '1.4', 'Iris-versicolor'], ['5.6', '3.6', '2.9', '1.3', 'Iris-versicolor'], ['6.7', '4.4', '3.1', '1.4', 'Iris-versicolor'], ['5.6', '4.5', '3.0', '1.5', 'Iris-versicolor'], ['5.8', '4.1', '2.7', '1.0', 'Iris-versicolor'], ['6.2', '4.5', '2.2', '1.5', 'Iris-versicolor'], ['5.6', '3.9', '2.5', '1.1', 'Iris-versicolor'], ['5.9', '4.8', '3.2', '1.8', 'Iris-versicolor'], ['6.1', '4.0', '2.8', '1.3', 'Iris-versicolor'], ['6.3', '4.9', '2.5', '1.5', 'Iris-versicolor'], ['6.1', '4.7', '2.8', '1.2', 'Iris-versicolor'], ['6.4', '4.3', '2.9', '1.3', 'Iris-versicolor'], ['6.6', '4.4', '3.0', '1.4', 'Iris-versicolor'], ['6.8', '4.8', '2.8', '1.4', 'Iris-versicolor'], ['6.7', '5.0', '3.0', '1.7', 'Iris-versicolor'], ['6.0', '4.5', '2.9', '1.5', 'Iris-versicolor'], ['5.7', '3.5', '2.6', '1.0', 'Iris-versicolor'], ['5.5', '3.8', '2.4', '1.1', 'Iris-versicolor'], ['5.5', '3.7', '2.4', '1.0', 'Iris-versicolor'], ['5.8', '3.9', '2.7', '1.2', 'Iris-versicolor'], ['6.0', '5.1', '2.7', '1.6', 'Iris-versicolor'], ['5.4', '4.5', '3.0', '1.5', 'Iris-versicolor'], ['6.0', '4.5', '3.4', '1.6', 'Iris-versicolor'], ['6.7', '4.7', '3.1', '1.5', 'Iris-versicolor'], ['6.3', '4.4', '2.3', '1.3', 'Iris-versicolor'], ['5.6', '4.1', '3.0', '1.3', 'Iris-versicolor'], ['5.5', '4.0', '2.5', '1.3', 'Iris-versicolor'], ['5.5', '4.4', '2.6', '1.2', 'Iris-versicolor'], ['6.1', '4.6', '3.0', '1.4', 'Iris-versicolor'], ['5.8', '4.0', '2.6', '1.2', 'Iris-versicolor'], ['5.0', '3.3', '2.3', '1.0', 'Iris-versicolor'], ['5.6', '4.2', '2.7', '1.3', 'Iris-versicolor'], ['5.7', '4.2', '3.0', '1.2', 'Iris-versicolor'], ['5.7', '4.2', '2.9', '1.3', 'Iris-versicolor'], ['6.2', '4.3', '2.9', '1.3', 'Iris-versicolor'], ['5.1', '3.0', '2.5', '1.1', 'Iris-versicolor'], ['5.7', '4.1', '2.8', '1.3', 'Iris-versicolor']])
We have read in the iris dataset and stored in a pandas dataframe for you in the below cell, this variable is called df.
Your task is to finish the function df_manipulate. Dataframe df is passed into df_manipulate. Using Dataframe df do the following:
df = pd.read_csv('./iris.csv') # Read in the iris dataset
df.head() # First 5 rows of iris.csv
def df_manipulate(df):
'''
This function will return a transposed dataframe and the sum of the
'sepal length (in cm)' column.
df: dataframe with data from iris dataset
'''
### YOUR CODE HERE
return sepal_length_sum, dfT
sepal_length_sum, dfT = df_manipulate(df)
assert_equal(dfT.equals(df.T), True)
assert_almost_equal(sepal_length_sum, 876.5, places=5)
© 2017: Robert J. Brunner at the University of Illinois.
This notebook is released under the Creative Commons license CC BY-NC-SA 4.0. Any reproduction, adaptation, distribution, dissemination or making available of this notebook for commercial use is not allowed unless authorized in writing by the copyright holder.