In [1]:# File 'student-mat.csv' contains a list of students and#their alcohol consumption habits along with some demographic data.#Answer the following questions -In [2]:import osimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npfrom scipy import statsfrom statsmodels.stats import weightstats as stestsfrom scipy.stats import ttest_indfrom scipy.stats import ttest_1sampfrom scipy.stats import normIn [3]:#Load the file “Movie-Ratings.csv”student_mat = pd.read_csv("student-mat.csv")display(student_mat)sex ageMjobFjobguardianWorkdayconsumptionWeekendconsumptionabsencesfromschool0F18at_hometeachermother1161F17at_homeotherfather1142F15at_homeothermother23103F15healthservices mother1124F16otherotherfather124...........................390M20services services other4511391M17services services mother343392M21otherotherother333393M18services othermother340394M19otherat_homefather335395 rows × 8 columnsIn [4]:#1 Is there a difference between mean workday alcohol consumption for students who
reported their# mother as being at home compared to those whose mothers are working ? Look atcolumn Mjob. (10 points)# HomeHome = student_mat.loc[(student_mat.Mjob == 'at_home')].reset_index(drop=True)Work = student_mat.loc[(student_mat.Mjob != 'at_home')].reset_index(drop=True)Work.loc[:,'Mjob'] = 'at_work'compare_data = pd.concat([Home,Work]).reset_index(drop=True)display(compare_data, len(Home),len(Work),len(compare_data))sex age MjobFjobguardianWorkdayconsumptionWeekendconsumptionabsencesfromschool0F18at_hometeachermother1161F17at_homeotherfather1142F15at_homeothermother23103F15at_homeothermother1184F16at_homeothermother1212...........................390M20at_workservices other4511391M17at_workservices mother343392M21at_workotherother333393M18at_workothermother340394M19at_workat_homefather335395 rows × 8 columns59336395In [5]:# Remove outliers outside of 6-sigma and re plot the data#Home data
stdH = Home['Workday consumption'].std(ddof=1)meanH = Home['Workday consumption'].mean()#screening anything outside +/- 6 std devfence_low_stdH = meanH - 6*stdHfence_high_stdH = meanH + 6*stdHHome = Home.loc[(Home['Workday consumption'] > fence_low_stdH) & (Home['Workdayconsumption'] < fence_high_stdH)]#Work datastdW = Work['Workday consumption'].std(ddof=1)meanW = Work['Workday consumption'].mean()#screening anything outside +/- 6 std devfence_low_stdW = meanW - 6*stdWfence_high_stdW = meanW + 6*stdWWork = Work.loc[(Work['Workday consumption'] > fence_low_stdW) & (Work['Workdayconsumption'] < fence_high_stdW)]compare_data = pd.concat([Home,Work]).reset_index(drop=True)display(compare_data, len(Home),len(Work),len(compare_data))sex age MjobFjobguardianWorkdayconsumptionWeekendconsumptionabsencesfromschool0F18at_hometeachermother1161F17at_homeotherfather1142F15at_homeothermother23103F15at_homeothermother1184F16at_homeothermother1212...........................
Upload your study docs or become a
Course Hero member to access this document
Upload your study docs or become a
Course Hero member to access this document
End of preview. Want to read all 14 pages?
Upload your study docs or become a
Course Hero member to access this document
Term
Fall
Professor
NoProfessor
Tags