Group_8_NewsRecommendation.html - In[84 import pandas as pd In[85 data = pd.read_csv\/Users\/home\/Downloads\/News_df_11000.csv encoding='latin-1 In[86

Group_8_NewsRecommendation.html - In[84 import pandas as pd...

This preview shows page 1 - 4 out of 34 pages.

In [84]: import pandas as pd In [85]: data = pd.read_csv("/Users/home/Downloads/News_df_11000.csv", encoding='latin-1') In [86]: data.head(2) Out[86]: DocID highlights story 0 0 ['Syrian official: Obama climbed to the top of... It's official: U.S. President Barack Obama wan... 1 1 ['NEW: A Canadian doctor says she was part of ... Los Angeles (CNN) -- A medical doctor in Vanco... In [87]: data['story'] = data['story'].apply(lambda x: " ".join(x.lower() for x in x.split())) data['story'].head(2) Out[87]: 0 it's official: u.s. president barack obama wan... 1 los angeles (cnn) -- a medical doctor in vanco... Name: story, dtype: object In [88]: data['highlights'] = data['highlights'].apply(lambda x: " ".join(x.lower() for x in x.split())) data['highlights'].head(2) Out[88]: 0 ['syrian official: obama climbed to the top of... 1 ['new: a canadian doctor says she was part of ... Name: highlights, dtype: object In [89]: data['highlights'] = data['highlights'].str.replace('[^\w\s]','') data['highlights'].head(2) Out[89]: 0 syrian official obama climbed to the top of th... 1 new a canadian doctor says she was part of a t... Name: highlights, dtype: object In [90]:
data['story'] = data['story'].str.replace('[^\w\s]','') data['story'].head(2) Out[90]: 0 its official us president barack obama wants l... 1 los angeles cnn a medical doctor in vancouver... Name: story, dtype: object In [91]: data.head() Out[91]: DocID highlights story 0 0 syrian official obama climbed to the top of th... its official us president barack obama wants l... 1 1 new a canadian doctor says she was part of a t... los angeles cnn a medical doctor in vancouver... 2 2 the 15 new cardinals will be installed on febr... cnnfor the second time during his papacy pope ... 3 3 new bermudan premier above all this was a huma... hamilton bermuda cnn four chinese nationals o... 4 4 a 4yearold boy is the latest victim of a manea... kathmandu nepal cnn a ferocious leopard may h... In [92]: import graphlab as gl In [93]: news_data = gl.SFrame(data) In [94]: news_data.head() Out[94]: DocID highlights story 0 syrian official obama climbed to the top of ... its official us president barack obama wants ... 1 new a canadian doctor says she was part of a ... los angeles cnn a medical doctor in ... 2 the 15 new cardinals will be installed on february ... cnnfor the second time during his papacy pope ... 3 new bermudan premier above all this was a ... hamilton bermuda cnn four chinese national ... 4 a 4yearold boy is the kathmandu nepal cnn a
latest victim of a ... ferocious leopard may ... 5 new kyle white without this team there would be ... cnn kyle white now has two pieces of metal to ... 6 studies show that an essential nutrient in ... healthcom an essential nutrient found in fish ... 7 judge orders all three men to be held in police ... ballet dancer pavel dmitrichenko often cast ... 8 ken klukowski cases heard by supreme court could ... this week the supreme court heard two historic ... 9 liberia is one of the countries worsthit by ... zango town liberia cnn at the gravesite in a ...

  • Left Quote Icon

    Student Picture

  • Left Quote Icon

    Student Picture

  • Left Quote Icon

    Student Picture