Data Visualization with Pandas

  • Downloading online data from jupyter
  • Handling the datetime in pandas
  • Data visualzation using covid-19 data
  • subplots using matplotlib
  • curve-fitting and modeling of COVID-19 data.
!ls

#!pwd
class-wk4.ipynb       wk4-class_notes.ipynb
!wget https://covid.ourworldindata.org/data/ecdc/total_cases.csv
!wget https://covid.ourworldindata.org/data/ecdc/total_deaths.csv
--2020-05-09 10:38:54--  https://covid.ourworldindata.org/data/ecdc/total_cases.csv
Resolving covid.ourworldindata.org (covid.ourworldindata.org)... 104.248.50.87
Connecting to covid.ourworldindata.org (covid.ourworldindata.org)|104.248.50.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 70071 (68K) [text/csv]
Saving to: ‘total_cases.csv’

total_cases.csv     100%[===================>]  68.43K  --.-KB/s    in 0.05s

2020-05-09 10:38:54 (1.43 MB/s) - ‘total_cases.csv’ saved [70071/70071]

--2020-05-09 10:38:54--  https://covid.ourworldindata.org/data/ecdc/total_deaths.csv
Resolving covid.ourworldindata.org (covid.ourworldindata.org)... 104.248.50.87
Connecting to covid.ourworldindata.org (covid.ourworldindata.org)|104.248.50.87|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 54625 (53K) [text/csv]
Saving to: ‘total_deaths.csv’

total_deaths.csv    100%[===================>]  53.34K  --.-KB/s    in 0.03s

2020-05-09 10:38:54 (1.57 MB/s) - ‘total_deaths.csv’ saved [54625/54625]
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('total_deaths.csv')
df.head(3)
date World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
0 2019-12-31 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
1 2020-01-01 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2 2020-01-02 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN

3 rows × 211 columns

df.tail(2)
date World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
129 2020-05-08 269249 106.0 31.0 483.0 47.0 2.0 0.0 3.0 282.0 ... 4.0 17.0 10.0 0.0 10.0 0.0 0.0 6.0 4.0 4.0
130 2020-05-09 274290 109.0 31.0 488.0 47.0 2.0 0.0 3.0 293.0 ... 4.0 18.0 10.0 0.0 10.0 0.0 0.0 7.0 4.0 4.0

2 rows × 211 columns

df.columns
Index(['date', 'World', 'Afghanistan', 'Albania', 'Algeria', 'Andorra',
       'Angola', 'Anguilla', 'Antigua and Barbuda', 'Argentina',
       ...
       'United States Virgin Islands', 'Uruguay', 'Uzbekistan', 'Vatican',
       'Venezuela', 'Vietnam', 'Western Sahara', 'Yemen', 'Zambia',
       'Zimbabwe'],
      dtype='object', length=211)
df.dtypes
date               object
World               int64
Afghanistan       float64
Albania           float64
Algeria           float64
                   ...   
Vietnam           float64
Western Sahara    float64
Yemen             float64
Zambia            float64
Zimbabwe          float64
Length: 211, dtype: object
df = pd.read_csv('total_deaths.csv', parse_dates=['date'], index_col = ['date'] )
df.head(3)
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-01 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-02 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN

3 rows × 210 columns

df.index
DatetimeIndex(['2019-12-31', '2020-01-01', '2020-01-02', '2020-01-03',
               '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09',
               ...
               '2020-04-30', '2020-05-01', '2020-05-02', '2020-05-03',
               '2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07',
               '2020-05-08', '2020-05-09'],
              dtype='datetime64[ns]', name='date', length=131, freq=None)
# data from fixed date period
df['2020-01-01':'2020-01-11']
df_jan
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2020-01-01 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-02 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-03 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-04 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-05 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-06 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-07 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-08 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-09 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-10 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-11 1 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN

11 rows × 210 columns

df.index
DatetimeIndex(['2019-12-31', '2020-01-01', '2020-01-02', '2020-01-03',
               '2020-01-04', '2020-01-05', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09',
               ...
               '2020-04-30', '2020-05-01', '2020-05-02', '2020-05-03',
               '2020-05-04', '2020-05-05', '2020-05-06', '2020-05-07',
               '2020-05-08', '2020-05-09'],
              dtype='datetime64[ns]', name='date', length=131, freq=None)
df.index.strftime('%m-%d')
Index(['12-31', '01-01', '01-02', '01-03', '01-04', '01-05', '01-06', '01-07',
       '01-08', '01-09',
       ...
       '04-30', '05-01', '05-02', '05-03', '05-04', '05-05', '05-06', '05-07',
       '05-08', '05-09'],
      dtype='object', length=131)

Data preprocessing

dfd = pd.read_csv('total_deaths.csv', parse_dates=['date'], index_col = ['date'] )
dfc = pd.read_csv('total_cases.csv', parse_dates=['date'], index_col = ['date'] )
dfd.head()
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-01 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-02 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-03 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-04 0 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN

5 rows × 210 columns

dfc.head()
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 27 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-01 27 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-02 27 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-03 44 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN
2020-01-04 44 0.0 NaN 0.0 NaN NaN NaN NaN NaN 0.0 ... NaN NaN NaN NaN NaN 0.0 NaN NaN NaN NaN

5 rows × 210 columns

# check for NaN in the entire dataframe
dfd.isnull().values.any()
True
# count the NaN under each columns
dfd.isnull().sum()
World               0
Afghanistan        10
Albania            69
Algeria             5
Andorra            74
                 ... 
Vietnam             4
Western Sahara    117
Yemen             101
Zambia             79
Zimbabwe           81
Length: 210, dtype: int64
# Count total NaN in the dataframe
dfd.isnull().sum().sum()
11473
# Replace the NaN by some numbers
dfd = dfd.fillna(0)  # dfd.fillna(  dfd.mean()  )
dfc = dfc.fillna(0)
dfd.head(2)
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-01 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

2 rows × 210 columns

dfc.head()
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 27 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-01 27 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-02 27 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-03 44 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-04 44 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 210 columns

Data visualization

# taking out data of a column
# two methods
dfd.Afghanistan
date
2019-12-31      0.0
2020-01-01      0.0
2020-01-02      0.0
2020-01-03      0.0
2020-01-04      0.0
              ...  
2020-05-05     90.0
2020-05-06     95.0
2020-05-07    104.0
2020-05-08    106.0
2020-05-09    109.0
Name: Afghanistan, Length: 131, dtype: float64
#dfd.United States # doesn't work
#method 2
dfd['United States']
date
2019-12-31        0
2020-01-01        0
2020-01-02        0
2020-01-03        0
2020-01-04        0
              ...  
2020-05-05    68934
2020-05-06    71078
2020-05-07    73431
2020-05-08    75670
2020-05-09    77180
Name: United States, Length: 131, dtype: int64
dfd.columns
Index(['World', 'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       ...
       'United States Virgin Islands', 'Uruguay', 'Uzbekistan', 'Vatican',
       'Venezuela', 'Vietnam', 'Western Sahara', 'Yemen', 'Zambia',
       'Zimbabwe'],
      dtype='object', length=210)
# plot Chinese data
%matplotlib inline

plt.plot(dfd.index, dfd.China)
[<matplotlib.lines.Line2D at 0x11525d490>]

png

plt.plot(dfd.index, dfd.India)
[<matplotlib.lines.Line2D at 0x112e05cd0>]

png

plt.plot(dfd.index, dfd['United States'])
[<matplotlib.lines.Line2D at 0x115393fd0>]

png

import seaborn as sns
sns.set_style('whitegrid')
plt.plot(dfd.index, dfd['United States'])
[<matplotlib.lines.Line2D at 0x11d14f110>]

png

plt.plot(dfd.index, dfd['United States'])
plt.xticks(rotation=45);
(array([737425., 737446., 737456., 737477., 737485., 737506., 737516.,
        737537., 737546.]), <a list of 9 Text xticklabel objects>)

png

plt.plot(dfd.index, dfc['Nepal'])
plt.xticks(rotation=45);

png

plt.figure(figsize=(15,5))

plt.subplot(1, 3, 1)
plt.plot(dfd.index, dfc['Nepal'])
plt.title('Nepal')
plt.xticks(rotation=45);

plt.subplot(1, 3, 2)
plt.plot(dfd.index, dfc['India'])
plt.title('India')
plt.xticks(rotation=45);

plt.subplot(1, 3, 3)
plt.plot(dfd.index, dfc['Pakistan'])
plt.title('Pakistan')

plt.xticks(rotation=45);

png

countries = ['Nepal', 'India', 'Pakistan']

#for i in range(3):
#    print (i, countries[i])

#using enumerate

plt.figure(figsize=(15,5))

for i, c in enumerate(countries):
    plt.subplot(1, 3, 1+i)
    plt.plot(dfd.index, dfd[c], label='Deaths')
    plt.plot(dfc.index, dfc[c], label='Cases')

    plt.legend(fontsize=12, frameon=False)
    plt.title(c, fontsize=16, color='maroon')
    plt.xticks(rotation=45);
    #print (i, c)

png

countries = ['United States', 'Italy', 'France', 'United Kingdom', 'China', 'Germany']

plt.figure(figsize=(15,10))

for i, c in enumerate(countries):
    plt.subplot(2, len(countries)//2, 1+i)
    #plt.plot(dfd.index, dfd[c], label='Deaths')
    #plt.plot(dfc.index, dfc[c], label='Cases')

    plt.plot(dfd.index, dfd[c], 'o', label='Deaths')
    plt.plot(dfc.index, dfc[c], 'o', label='Cases')

    plt.legend(fontsize=12, frameon=False)
    plt.title(c, fontsize=16, color='maroon')
    plt.xticks(rotation=45);

png

#1000 cases  in USA
N= 1000
print ( dfc['United States'].index[ dfc['United States'] >= N][0] )

N= 10000
print ( dfc['United States'].index[ dfc['United States'] >= N][0] )

N= 100000
print ( dfc['United States'].index[ dfc['United States'] >= N][0] )

N= 500000
print ( dfc['United States'].index[ dfc['United States'] >= N][0] )
2020-03-11 00:00:00
2020-03-20 00:00:00
2020-03-28 00:00:00
2020-04-11 00:00:00
milestones = [1000, 10000, 100000, 500000, 1000000]

for i, n in enumerate(milestones):
    print ( dfc['United States'].index[ dfc['United States'] >= n][0] )
2020-03-11 00:00:00
2020-03-20 00:00:00
2020-03-28 00:00:00
2020-04-11 00:00:00
2020-04-29 00:00:00
milestones = [1000, 10000, 100000, 500000, 1000000]

for i, n in enumerate(milestones):
    indx = dfc['United States'].index[ dfc['United States'] >= n][0]     
    plt.scatter( n, indx)
    #plt.axvline(x=n)
    #plt.xticks(milestones, milestones)

png

milestones = [1000, 10000, 100000, 500000, 1000000]

countries = ['United States', 'Italy', 'France']#, 'United Kingdom', 'China', 'Germany']

plt.figure(figsize=(15,5))

for i, c in enumerate(countries):
    plt.subplot(1, len(countries), i+1 )
    plt.title(c)

    for j, n in enumerate(milestones):
        indx = dfc['United States'].index[ dfc['United States'] >= n][0]
        plt.scatter( n, indx, color='blue')
        #plt.axvline(x=n)
        #plt.xticks(milestones, milestones)

png

milestones = [1000, 10000, 100000, 500000, 1000000]

countries = ['United States', 'Italy', 'France']#, 'United Kingdom', 'China', 'Germany']

plt.figure(figsize=(15,5))

for i, c in enumerate(countries):
    plt.subplot(1, len(countries), i+1 )
    plt.title(c)

    for j, n in enumerate(milestones):
        indx = dfc['United States'].index[ dfc['United States'] >= n][0]
        plt.scatter( n, indx.strftime('%m-%d'), color='blue')
        #plt.axvline(x=n)
        #plt.xticks(milestones, milestones)

png

Curve fitting

def func(x, a, b, c):
    return a* np.exp(-b*x)+c

xdata = np.linspace(0, 10, 100)
y = func(xdata, 2.5, 1.3, 0.5)

#plt.plot (xdata, y, 'o')

y_noise = 0.5* np.random.rand(100)

ydata = y + y_noise

plt.plot (xdata, ydata, 'o')

from scipy.optimize import curve_fit

popt, pcov = curve_fit(func, xdata, ydata)

print (popt)

plt.plot(xdata, func(xdata, popt[0], popt[1], popt[2]), color='red')
[2.4696733  1.31979388 0.75564748]





[<matplotlib.lines.Line2D at 0x11d580510>]

png

def sigmoid(x, L, x0, k):
    return L/(1+np.exp(-k*(x-x0)))

xx = np.array( range(len(dfc.index)) )

yy = dfc.Italy

plt.plot(xx, yy, 'o', label='Cases: data')

mean = np.mean(yy)
sig = np.std(yy)

p0=[1, mean, 1.]

popt, pcov = curve_fit(sigmoid, xx, yy)#, p0)
#print (popt)

#plt.plot(xx, sigmoid(xx, popt[0], popt[1], popt[2]), color='red')


plt.plot(xx, sigmoid(xx, *popt), label='Fit')
plt.legend();

png

# for prediction
plt.plot(xx, yy, 'o', label='Cases: data')

xnew = np.array( range(180) )
plt.plot(xnew, sigmoid(xnew, *popt), label='Fit')
plt.legend();

png

Fitting the daily cases/deaths

dfd1 = dfd.diff()
dfc1 = dfc.diff()

dfd1.head()
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2019-12-31 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2020-01-01 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-02 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-03 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-01-04 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 210 columns

dfd1 = dfd.diff().fillna(0)
dfc1 = dfc.diff().fillna(0)

#dfd1.head()
dfd1.tail()
World Afghanistan Albania Algeria Andorra Angola Anguilla Antigua and Barbuda Argentina Armenia ... United States Virgin Islands Uruguay Uzbekistan Vatican Venezuela Vietnam Western Sahara Yemen Zambia Zimbabwe
date
2020-05-05 3999.0 5.0 0.0 2.0 0.0 0.0 0.0 0.0 14.0 4.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2020-05-06 5952.0 5.0 0.0 5.0 1.0 0.0 0.0 0.0 4.0 1.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0
2020-05-07 6387.0 9.0 0.0 6.0 0.0 0.0 0.0 0.0 9.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 0.0
2020-05-08 5748.0 2.0 0.0 7.0 1.0 0.0 0.0 0.0 9.0 2.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
2020-05-09 5041.0 3.0 0.0 5.0 0.0 0.0 0.0 0.0 11.0 1.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0

5 rows × 210 columns

plt.plot(dfd1.index, dfd1['United Kingdom'], 'o')
[<matplotlib.lines.Line2D at 0x11dd61110>]

png

def Gaussian(x, x0, sigma, a):
    return a*np.exp(-(x-x0)**2/(2*sigma**2) )

xx = np.array( range(len(dfc.index)) )
yy = dfd1['United Kingdom']

plt.scatter(xx, yy)

p0 = [ np.median(yy), np.std(yy), np.max(yy) ]

popt, pcov = curve_fit(Gaussian, xx, yy, p0)
plt.plot(xx, Gaussian(xx, *popt), color='red')
plt.title('United Kingdom')
Text(0.5, 1.0, 'United Kingdom')

png

countries = ['United States', 'Italy', 'France']#, 'United Kingdom', 'China', 'Germany']

plt.figure(figsize=(15,5))

for i, c in enumerate(countries):
    plt.subplot(1, len(countries), i+1 )
    plt.title(c)

    xx = np.array( range(len(dfc.index)) )
    yy = dfd1[c]

    plt.scatter(xx, yy)

    p0 = [ np.median(yy), np.std(yy), np.max(yy) ]

    popt, pcov = curve_fit(Gaussian, xx, yy, p0)
    plt.plot(xx, Gaussian(xx, *popt), color='red')

png