Course Hero Logo

Opengetpathlocalesjson wt as fp dumplocales fp

Course Hero uses AI to attempt to automatically extract content from documents to surface to you and others so you can study better, e.g., in search results, to enrich docs, and more. This preview shows page 13 - 16 out of 59 pages.

open(get_path("locales.json"), "wt")asfp:dump(locales, fp, indent=2, sort_keys=True)print("==> Done generating locales file.")ex0_gen_locales()ex0_gen_soln(force=False)### END HIDDEN TESTSdefex0_random_value():fromrandomimportrandom, randint, choicefromnumpyimportnanfromproblem_utilsimportex0_random_date, ex0_random_stringoptions = [randint(-100, 100)# int, ex0_random_string(randint(1, 10))# string, ex0_random_date(),# date'',# implicit NaNnan# explicit NaN]returnchoice(options)defex0_get_locales(filename=get_path("locales.json")):fromjsonimportloadwithopen(filename, "rt")asfp:locales = load(fp)returnlocalesdefex0_gen_row(locales, num_dummies=0):fromdatetimeimportdatetimefromrandomimportchoice, random, randintfromnumpyimportnanfromproblem_utilsimportex0_random_datecountry = choice(list(locales.keys()))province = nanifrandom() <= 0.1elsechoice(locales[country])confirmed = 0ifrandom() <= 0.1elserandint(1, 100000)last_updated = ex0_random_date()ifnum_dummies:dummy_vals = tuple([ex0_random_value()for_inrange(num_dummies)])else:dummy_vals = ()
11/1/2020main2/pmt2-sample-solutions (1)/midterm2/problem18/problem18.html14/59return(country, province, confirmed, last_updated, *dummy_vals)defex0_gen_df():fromrandomimportrandint, randomfrompandasimportDataFramefromproblem_utilsimportex0_random_stringlocales = ex0_get_locales()# Generate random columns, which the student should ignorenum_dummy_cols = randint(1, 4)dummy_cols = []whilelen(dummy_cols) != num_dummy_cols:dummy_cols = list({ex0_random_string(5)for_inrange(num_dummy_cols)})# Generate a bunch of random rowsnum_trials = randint(10, 50)rows = [ex0_gen_row(locales, num_dummy_cols)for_inrange(num_trials)]# Remove any initial duplicatesrows = sorted(rows, key=lambdax: repr(x))rows_soln = [rows[0]]forrinrows[1:]:ifrepr(r) != repr(rows_soln[-1]):rows_soln.append(r)# Construct the solution tibblecols_in = ["Country/Region"ifrandom() < 0.75else"Country_Region","Province/State"ifrandom() < 0.75else"Province_State","Confirmed","Last Update"ifrandom() < 0.75else"Last_Update"]cols_out = ["Country/Region", "Province/State", "Confirmed", "Last Update"]df_soln = DataFrame(rows_soln, columns=cols_out + dummy_cols)[cols_out] \.rename(columns={"Last Update": "Timestamp"})# Generate a corresponding input tibblerows_in = []forrinrows_soln:s = list(r)ifs[2] == 0:s[2] = ''# NaN countsr_in = tuple(s)rows_in.append(r_in)ifrandom() <= 0.15:# Random duplicatesfor_inrange(randint(1, 4)):rows_in.append(r_in)df_in = DataFrame(rows_in, columns=cols_in + dummy_cols)returndf_in, df_solndefex0_split_df(df, max_splits=5):fromrandomimportrandintfromnumpyimportarange, sort, appendfromnumpy.randomimportshuffle, choice# Shuffle the rows
11/1/2020main2/pmt2-sample-solutions (1)/midterm2/problem18/problem18.html15/59df = df.sample(frac=1).reset_index(drop=True)# Split the rowsdf_split = []num_splits = min(randint(0, max_splits), len(df))ifnum_splits > 0:split_inds = sort(choice(arange(len(df)), size=num_splits, replace=False))ifsplit_inds[0] > 0:split_inds = append(0, split_inds)ifsplit_inds[-1] < len(df):split_inds = append(split_inds, len(df))fori, jin

Upload your study docs or become a

Course Hero member to access this document

Upload your study docs or become a

Course Hero member to access this document

End of preview. Want to read all 59 pages?

Upload your study docs or become a

Course Hero member to access this document

Term
Fall
Professor
DaKuang
Tags
Extract transform load

Newly uploaded documents

Show More

Newly uploaded documents

Show More

  • Left Quote Icon

    Student Picture

  • Left Quote Icon

    Student Picture

  • Left Quote Icon

    Student Picture