# Solution
import numpy as np
primes = np.array([2,3,5,7,11,13,17,19])
squares = np.array([i*i for i in range(1, 41)])

# Testing 
assert type(primes) == np.ndarray, "primes should be a numpy array"
assert primes.shape == (8,), "primes should be a 1D array with 8 elements"
assert np.sum(primes) == 77, "Sum of primes should be 77"
assert list(primes) == [2, 3, 5, 7, 11, 13, 17, 19]

assert type(squares) == np.ndarray, "squares should be a numpy array"
assert squares.shape == (40,), "squares should be a 1D array with 40 elements"
assert np.sum(squares) == 22140, "Sum of squares should be 22140"

# Solution
C = 5.2*np.ones((27, 44))
D = C[:4, :6].copy()    
# important: without .copy(), the matrix D would only be a `view` of C's leading submatrix

# Testing
assert type(C) == np.ndarray, "C should be a numpy array"
assert C.shape == (27, 44), "C should be a 2D array with 27 rows and 44 columns"
assert np.all(np.isclose(C, 5.2)), "C should have all entries equal to 5.2"

assert type(D) == np.ndarray, "D should be a numpy array"
assert D.shape == (4, 6), "D should be a 2D array with 4 rows and 6 columns"
assert np.all(np.isclose(D, 5.2)), "D should have all entries equal to 5.2"
assert D.flags['OWNDATA'], "D should be a deep copy, not a view, of C"

# Solution
R = np.reshape(range(0, 30), (5, 6))
S = R[::2, 2:5]
print(S)

[[ 2  3  4]
 [14 15 16]
 [26 27 28]]

# Testing
assert np.all(S == np.array([[2, 3, 4], [14, 15, 16], [26, 27, 28]])), "S is incorrect"

# Solution
def row_col_mean(A):
    return np.mean(A, axis=1), np.mean(A, axis=0)

# Testing
B = np.array([[1, 3, 4],[5, 0, 2]])
assert np.all( np.isclose(row_col_mean(B)[0], np.array([2.6666667, 2.3333333])) ), "Row means are incorrect"
assert np.all( np.isclose(row_col_mean(B)[1], np.array([3, 1.5, 3])) ), "Column means are incorrect"
testA = np.array([[1,3,-4,-9], [5,0,2,2], [3,5,1,0]])
t0, t1 = row_col_mean(testA)
assert np.isclose(t0, np.array([-2.25, 2.25, 2.25])).all()
assert np.isclose(t1, np.array([ 3., 2.66666667, -0.33333333, -2.33333333])).all()

# Solution
from time import time

lst = [-1, 1]*(10**7//2)
st = time()   # start time
s1 = sum(lst)
t1 = time() - st
print("Python list sum took", t1, "seconds")

nplst = np.array(lst)
st = time()   # start time
s2 = nplst.sum()
t2 = time() - st
print("Numpy array sum took", t2, "seconds")

print("Numpy sum is", t1/t2, "times faster")

Python list sum took 0.0606694221496582 seconds
Numpy array sum took 0.011102437973022461 seconds
Numpy sum is 5.4645134966821995 times faster

# Solution
from IPython.display import display
import numpy as np
import pandas as pd
weather = pd.read_csv("_datasets/mcr_airport_weather.csv")

print("part 1:")
display(weather.head(7))

print("part 2:")
display(weather[['snow','tsun']].isna().all())

# might as well get rid of snow and tsun
weather.drop(columns=['snow','tsun'], inplace=True)

print("part 3:")
weather_june = weather[(weather["date"] >= '2024-06-01') & (weather["date"] < '2024-07-01')]
prcp_june = weather_june['prcp'].sum()
print("prcp_june =", prcp_june)

# alternatively, we could do (but requires knowledge of # days in June):
weather2 = weather.set_index("date")
weather_june = weather2.loc['2024-06-01':'2024-06-30']
prcp_june = weather_june['prcp'].sum()

weather_sept = weather[(weather["date"] >= '2024-09-01') & (weather["date"] < '2024-10-01')]
range_sept = weather_sept['tmax'].max() - weather_sept['tmin'].min()
print("range_sept =", range_sept)

tmax = weather['tmax'].max()
hottest = weather[np.isclose(weather["tmax"], tmax)]
print("hottest =", hottest)

part 1:

part 2:

snow    True
tsun    True
dtype: bool

part 3:
prcp_june = 48.199999999999996
range_sept = 23.0
hottest =            date  tavg  tmin  tmax  prcp  wdir  wspd  wpgt    pres
200  2024-07-19  21.3  16.0  27.0   0.0   188   8.0  31.5  1014.9
212  2024-07-31  20.0  12.0  27.0   0.1   229   4.5  25.9  1015.6

# Testing
assert np.isclose(prcp_june, 48.2), "prcp_june incorrect"
assert np.isclose(range_sept, 23.0), "range_sept incorrect"
assert type(hottest) == pd.DataFrame, "hottest should be a dataframe"
assert hottest.shape[0] == 2, "hottest should have two rows"

# Solution
from IPython.display import display
ratings = pd.read_csv("_datasets/corporate_rating.csv")
print("part 1:")
display(ratings.head())

print("part 2:")
ratings["Rating_number"] = ratings["Rating"].replace( 
    ['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC', 'CC', 'C', 'D'],
    range(1, 11)
)
display(ratings[["Name", "Rating", "Rating_number"]].head(8))

print("part 3:")
print("There are", len(ratings["Name"].unique()), "unique company names.")

part 1:

part 2:

part 3:
There are 593 unique company names.

# Testing
assert len(ratings.columns) == 32, "There should be 32 columns"
assert pd.api.types.is_integer_dtype(ratings["Rating_number"]), "Rating_number should be an integer"
assert ratings["Rating_number"].min() == 1, "The minimum rating number should be 1"
assert ratings["Rating_number"].max() == 10, "The maximum rating number should be 10"
assert ratings["Rating_number"].sum() == 8841, "The sum of rating numbers should be 8841"

# Solution
rail = pd.read_csv("_datasets/rail_park_and_ride_spaces.csv", thousands=',')

# keep all rows with non-NaN `Railway Station`, set index
stat = rail['Railway Station'].notna()
rail = rail[stat].set_index('Railway Station')

# missing entries in column P&R Spaces
missing = rail[rail['P&R Spaces'].isna()]

# clean rail
rail_clean = rail[rail['P&R Spaces'].notna()]

# total number of spaces
total = rail_clean['P&R Spaces'].sum()

# now metro, remove question marks, set index
metro = pd.read_csv("_datasets/Metrolink_Park_and_Ride_Facilities.csv")
metro['Stop name'] = metro['Stop name'].str.replace('?', '', regex=False)
metro = metro.set_index('Stop name')

# make `Total parking` numerical or NaN
metro['Total parking'] = metro['Total parking'].str.extract('(\d+)')

# clean metro
metro_clean = metro[metro['Total parking'].notna()]
metro_clean

# merge
inner = pd.merge(rail_clean, metro_clean, left_index=True, right_index=True, how='inner')
inner

	date	tavg	tmin	tmax	prcp	snow	wdir	wspd	wpgt	pres	tsun
0	2024-01-01	6.7	5.0	9.0	14.3	NaN	200	18.3	38.9	994.0	NaN
1	2024-01-02	8.5	7.0	10.0	20.5	NaN	197	10.9	27.8	981.9	NaN
2	2024-01-03	7.1	6.0	8.0	3.0	NaN	217	8.3	20.4	988.2	NaN
3	2024-01-04	5.5	2.0	8.0	0.5	NaN	171	6.7	22.2	998.4	NaN
4	2024-01-05	4.9	2.0	7.0	0.0	NaN	293	5.6	14.8	1003.5	NaN
5	2024-01-06	2.1	-2.0	6.0	0.0	NaN	301	4.8	11.1	1018.2	NaN
6	2024-01-07	2.4	-1.0	5.0	0.0	NaN	11	4.5	13.0	1030.0	NaN

	Rating	Name	Symbol	Rating Agency Name	Date	Sector	currentRatio	quickRatio	cashRatio	daysOfSalesOutstanding	...	effectiveTaxRate	freeCashFlowOperatingCashFlowRatio	freeCashFlowPerShare	cashPerShare	companyEquityMultiplier	ebitPerRevenue	enterpriseValueMultiple	operatingCashFlowPerShare	operatingCashFlowSalesRatio	payablesTurnover
0	A	Whirlpool Corporation	WHR	Egan-Jones Ratings Company	11/27/2015	Consumer Durables	0.945894	0.426395	0.099690	44.203245	...	0.202716	0.437551	6.810673	9.809403	4.008012	0.049351	7.057088	15.565438	0.058638	3.906655
1	BBB	Whirlpool Corporation	WHR	Egan-Jones Ratings Company	2/13/2014	Consumer Durables	1.033559	0.498234	0.203120	38.991156	...	0.074155	0.541997	8.625473	17.402270	3.156783	0.048857	6.460618	15.914250	0.067239	4.002846
2	BBB	Whirlpool Corporation	WHR	Fitch Ratings	3/6/2015	Consumer Durables	0.963703	0.451505	0.122099	50.841385	...	0.214529	0.513185	9.693487	13.103448	4.094575	0.044334	10.491970	18.888889	0.074426	3.483510
3	BBB	Whirlpool Corporation	WHR	Fitch Ratings	6/15/2012	Consumer Durables	1.019851	0.510402	0.176116	41.161738	...	1.816667	-0.147170	-1.015625	14.440104	3.630950	-0.012858	4.080741	6.901042	0.028394	4.581150
4	BBB	Whirlpool Corporation	WHR	Standard & Poor's Ratings Services	10/24/2016	Consumer Durables	0.957844	0.495432	0.141608	47.761126	...	0.166966	0.451372	7.135348	14.257556	4.012780	0.053770	8.293505	15.808147	0.058065	3.857790

	In/Out M60	P&R Spaces	Lift	Total parking	Blue Badge Bays	Cycling stands	Cycle lockers	Car park height restriction?
East Didsbury	I	15.0	Yes?	302	18?	10 stands?	5 lockers?	2.4m?
Milnrow	O	15.0	?	19	3? spaces	5? stands	5�lockers	?
Navigation Road	NaN	81.0	NaN	81	6�spaces	5 stands	5 lockers	?

Exercise 2.1¶

Exercise 2.2¶

Exercise 2.3¶

Exercise 2.4¶

Exercise 2.5¶

Exercise 2.6¶

Exercise 2.7¶

Exercise 2.8¶

Exercise 2.9¶

Exercise 2.10¶

Exercise 2.11¶