bi-sep/electricity_kenya.ipynb

231 lines
75 KiB
Text
Raw Permalink Normal View History

2024-11-17 22:37:14 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"id": "c5fc2bff-487f-456d-972e-b54c3b6b8dab",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\\n (U.S. dollars per capita)':'Country'})\n",
"gdp_data = gdp_data[gdp_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"gdp_data = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')\n",
"gdp_data['Year'] = pd.to_numeric(gdp_data['Year'])\n",
"gdp_data['GDP_per_capita'] = pd.to_numeric(gdp_data['GDP_per_capita'])\n",
"\n",
"el_access_data = pd.read_csv('share-of-the-population-with-access-to-electricity.csv').rename(columns={'Entity':'Country', 'Access to electricity (% of population)':'electricity_access'})\n",
"el_access_data = el_access_data[el_access_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"\n",
"electricity_data = pd.read_csv('share-elec-by-source.csv').rename(columns={'Entity':'Country'})\n",
"electricity_data = electricity_data[electricity_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"renewable_columns = ['Hydro - % electricity', 'Solar - % electricity', \n",
" 'Wind - % electricity', 'Other renewables excluding bioenergy - % electricity']\n",
"non_renewable_columns = ['Coal - % electricity', 'Gas - % electricity', \n",
" 'Oil - % electricity', 'Nuclear - % electricity']\n",
"\n",
"electricity_data['Renewable'] = electricity_data[renewable_columns].sum(axis=1)\n",
"electricity_data['Non-renewable'] = electricity_data[non_renewable_columns].sum(axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "9cf6077e-536d-43e7-8663-c9087b8af7f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pearson Correlation with GDP Growth:\n",
" electricity_access Renewable GDP_growth\n",
"electricity_access 1.000000 0.834904 0.048234\n",
"Renewable 0.834904 1.000000 0.124918\n",
"GDP_growth 0.048234 0.124918 1.000000\n",
"\n",
"Regression Results with GDP Growth as Control:\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Renewable R-squared: 0.704\n",
"Model: OLS Adj. R-squared: 0.673\n",
"Method: Least Squares F-statistic: 22.62\n",
"Date: Sun, 17 Nov 2024 Prob (F-statistic): 9.42e-06\n",
"Time: 21:29:20 Log-Likelihood: -73.771\n",
"No. Observations: 22 AIC: 153.5\n",
"Df Residuals: 19 BIC: 156.8\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"======================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"--------------------------------------------------------------------------------------\n",
"const 48.4419 3.787 12.791 0.000 40.515 56.368\n",
"electricity_access 0.5613 0.084 6.651 0.000 0.385 0.738\n",
"GDP_growth 0.1767 0.260 0.679 0.505 -0.368 0.721\n",
"==============================================================================\n",
"Omnibus: 0.546 Durbin-Watson: 1.538\n",
"Prob(Omnibus): 0.761 Jarque-Bera (JB): 0.060\n",
"Skew: 0.121 Prob(JB): 0.970\n",
"Kurtosis: 3.083 Cond. No. 101.\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"import statsmodels.api as sm\n",
"from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
"\n",
"# Merge data on the Year column\n",
"merged_data = pd.merge(gdp_data, el_access_data[['Year', 'electricity_access']], on='Year', how='left')\n",
"merged_data = pd.merge(merged_data, electricity_data[['Year', 'Renewable']], on='Year', how='left')\n",
"\n",
"# Calculate year-on-year GDP growth\n",
"merged_data['GDP_growth'] = merged_data['GDP_per_capita'].pct_change() * 100 # Percentage change\n",
"\n",
"# Drop NaN values that might have been generated by pct_change\n",
"merged_data.dropna(subset=['electricity_access', 'Renewable', 'GDP_growth'], inplace=True)\n",
"\n",
"\n",
"# Step 1: Correlation analysis (now with GDP growth included)\n",
"correlation = merged_data[['electricity_access', 'Renewable', 'GDP_growth']].corr()\n",
"print(\"Pearson Correlation with GDP Growth:\")\n",
"print(correlation)\n",
"\n",
"# Step 2: Regression analysis (with GDP growth as a control)\n",
"X = merged_data[['electricity_access', 'GDP_growth']] # Include GDP growth as a predictor\n",
"y = merged_data['Renewable']\n",
"\n",
"# Add constant for intercept\n",
"X = sm.add_constant(X)\n",
"\n",
"# Run the regression model\n",
"model = sm.OLS(y, X).fit()\n",
"\n",
"# Print regression results\n",
"print(\"\\nRegression Results with GDP Growth as Control:\")\n",
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "76beea43-efc1-4184-92dc-2c8e7285bf8a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1MAAAIoCAYAAACMKrEqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADCGElEQVR4nOzdd3jT1f4H8HfSke5FoYNZsIwyW/Yus4wWFecFr+ICwcVFL+hVEVBZIi4c4PUyHPi7jqu0gOw9BBTZllWGZdO90pHz++OYtGnSNmmz+349Tx8l32+aT9M0yTvnnM9RCCEEiIiIiIiIyCxKexdARERERETkjBimiIiIiIiIaoFhioiIiIiIqBYYpoiIiIiIiGqBYYqIiIiIiKgWGKaIiIiIiIhqgWGKiIiIiIioFhimiIiIiIiIaoFhioiIiIiIqBYYpogciEKhgEKhsHcZddaiRQsoFApcuHChTt9nxYoVUCgUmDBhgkXqIn3bt2+HQqFAfHy8vUtxSGVlZZg7dy7atGkDT09P3lcOoC6PWWd4fnWGGk3hiq8BFy5cgEKhQIsWLexdCjkYhikn0LFjRygUCnh7eyMnJ8fe5TiV7du3Y9asWdi+fbtNb1f7gljTl63rqsl7772HWbNmISsry96lVCsrKwuzZs3Ce++9Z+9SHMqGDRswduxYNG7cGJ6enggKCkKbNm2QlJSEd955B+fOnbN3iU5l5syZeOWVV3DhwgV06NABffv2RceOHWu8nvYNYMUvpVKJ4OBg9O7dG4sWLUJRUZENfgKyF74GWFd9eA2Ij4+HQqHArFmzjB5Xq9UYNWoUFAoFwsLCcPz4cdsWSDru9i6Aqvf777/r/kCKiorw3Xff4bHHHrNzVc5j+/btmD17NgDY5RPlDh06IDAwsMrj1R2zh/feew8XL17EhAkTEBQUVOvv06pVK3h5ecHDw6NO9QQGBqJNmzaIiIjQuzwrKwuzZ89G8+bNMXXq1Drdhqt45pln8NFHHwEAfH19ER0dDR8fH1y8eBEpKSlISUnB1atXsWjRIjtX6hyEEPj000+hUCiwZ88edOvWzezvoVKpdNcrKyvDpUuXsH//fuzfvx+rV6/G9u3b4e/vb+nSyYHwNcB1XgM8PDzQpk0bNG7c2Ca3V52ioiLcfffd+PnnnxEeHo6tW7eiXbt29i6r3mKYcnBffPEFACAoKAhZWVn44osvGKacyIcfflgvpwVt2bLFIt/n7rvvxt13322R7+XKVq9ejY8++ghKpRLvvvsuJk2aBJVKpTt+8uRJfP311wgICLBjlc7l5s2byMjIQKNGjWoVpAAgPDwcu3fv1rtsw4YNuOeee/Dbb79h/vz5eOuttyxRLjkovgbUjSO9BjRu3Bh//PGHvctAYWEh7rrrLmzcuBGRkZHYunUr2rRpY++y6jVO83NgZWVlWL16NQBgyZIlcHNzw44dO3Dp0iU7V0ZEjmTlypUAgMceewzPPfecXpACgJiYGLz55puYPn26PcpzSoWFhQAAb29vi37fhIQE/OMf/wAA/PDDDxb93kTk2goLC5GUlISNGzeicePG2L59O4OUA2CYcmCbN2/G1atXER4ejgcffBCDBw+GEAJfffVVtdcrKCjAokWL0KtXLwQFBcHHxwfR0dH4+9//jh07dhicL4TAt99+i1GjRqFRo0ZQqVRo1qwZRo4ciRUrVhi9jQMHDuDBBx/Urc0ICwvDfffdh8OHDxs9/+LFi5g0aRJatmwJlUoFf39/tGzZEnfffTe++eYbg/OTk5ORkJCA0NBQeHh4oGHDhujUqROeffZZnDp1quY7D3LOunaK3+zZs/XmqVdezJqfn48333wTnTp1gq+vLwICAtCzZ0989NFHKC0tNen2rE0IgW+++QbDhg1DgwYNoFKp0LJlSzz33HO4du1aldfLyMjA66+/jtjYWAQEBMDPzw/t2rXDU089pft9add4XLx4EQAQFRVldF5/xcXfpaWlWLhwITp27AgfHx+9Rbk1LT7etGkTxo4di8jISKhUKkRGRmLQoEH46KOPoFardecZW3w8YcIEREVFAZCPq8prEADgwQcfhEKhwDvvvFPl/fLdd99BoVCge/fuVZ6jtWTJEigUCiQmJlZ5TkZGBjw9PeHh4YHbt2/rLj9+/DjGjx+Ppk2b6tYxRUdHY9y4cfj5559rvG1TnD9/HgDQpUuXWn8PjUaD999/Hx06dICXlxfCwsLw+OOP4+bNm0bP37RpE5555hl07twZISEh8PLyQqtWrTB58uQqP/CZMGECFAoFVqxYgbS0NEyYMAGNGzeGu7u73rqA2j7Wq3Pp0iVMnjwZUVFRUKlUCA0NxciRI7F+/XqDcysuMq/8GLPEGhftY66qv4+MjAy88sor6NChA3x9feHv749evXrhs88+g0ajMTi/4v165coVPPbYY4iIiICXlxfat2+vm/5ZFVOfzzUaDUJCQuDm5obMzEy9Y/v379fdR59//rnBbRh7Tjh//jwWLFiA+Ph4NG3aFCqVCg0bNsSIESOwdu3aamsGgJKSEsyePRutW7eGl5cXGjdujKeffhoZGRk1XrcyazzmLI2vAZI9XgOA6htQVLzt9evXY8CAAfD390dgYCBGjhxZ5XsjcxQUFGD06NHYsmULmjZtih07diA6OtrouX/++Seee+45tG7dGt7e3ggKCsKgQYPw3XffGT1fuz5r+/bt+OOPP3DfffchNDQU3t7e6Nq1K/773//qnV9WVoYmTZpAoVDg119/rbLmZ555BgqFAv/85z91l2VlZeHzzz/HnXfeiTvuuAPe3t4IDAxEz5498cEHHzjMey6zCHJY48aNEwDE888/L4QQYsWKFQKAaNeuXZXXuXjxomjXrp0AIACI6OhoERcXJ0JCQgQAMXDgQL3z1Wq1uPvuu3XnR0REiO7du4vGjRsLhUIhjD1EFi9erDsWEhIiYmNjRYMGDQQA4eHhIb7//nu989PS0kRoaKgAIHx8fETHjh1Fly5ddDV17txZ7/wPP/xQV094eLjo1q2biI6OFl5eXgKAePfdd026//r27SuaNm0qAIimTZuKvn376r7eeust3Xk3btwQHTt2FACEUqkUnTp10rsPhw0bJgoLC026TS3tdbdt21ar61VWXFws7rvvPt3xyMhI0blzZ+Hj46P7vaWmphpc7/fffxeRkZG6ny0mJkZ06dJFBAQECADikUceEUIIsW7dOtG3b1+hUqkEANGtWze9++u3334TQgixbds2AUAMGDBAjB49WgAQrVq1El27dhXt27fX3W7z5s0FAJGWlmZQ09NPP637ORo0aCC6desmmjdvLpRKpcF1li9frlenEEK89dZbolu3bgKAUKlUenX27dtXCCHEhg0bBADRsWPHKu/rxMREAUAsWbKkul+JEEI+Rtzd3YWHh4e4ffu20XOWLl0qAIhRo0bpLvvll1+Et7e3ACACAwNF586dRYcOHURgYKAAIO68884ab9sUXbt2FQDEww8/bNb1tL/PgQMH6p5voqOjRfv27YW7u7sAINq3by+KiooMruvm5iYUCoVo1KiR6NKli+jQoYPw9fXV/V5PnDhhcJ1HHnlEABAvvfSSCAoKEiqVSsTFxYm2bduKWbNmCSFq/1ivzv79+0VQUJAAIHx9fUXXrl1FkyZNdLfx2muv6Z3ft2/fKh9j2r+F6mgft82bNzd6fPXq1brnz8qOHz8uGjduLAAIT09PERMTI1q1aqV7zr333nuFRqPRu472fp01a5YIDw8XXl5eIi4uTve3D0C8+eabRmsx9/lc+3ezZs0avcsXLFigu62///3vescuXrwoAIhmzZrpXf74448LAMLPz0+0bt1adOvWTUREROi+z/z58w3qNfYcFB0dLbp06aJ7zN5xxx3i+vXrBte19PNrdfga4FqvAULI9zJV/V1rf55PPvlEKBQKERERIeLi4nTPiX5+fuLUqVMm3Y7WwIEDBQDx+uuvi7y8PBEfH6+7/fPnz1d5ve3bt+teY7y9vUXHjh1174UAiBdeeKHK21q0aJHw8/MT/v7+omvXrqJhw4a6633
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Plot scatter plot with regression line\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"sns.regplot(\n",
" x='electricity_access', y='Renewable', data=merged_data, \n",
" scatter_kws={'color': 'blue', 'label': 'Data points'}, \n",
" line_kws={'color': 'red', 'linewidth': 2, 'label': 'Regression line'}\n",
")\n",
"\n",
"# Add titles and labels\n",
"plt.title('Access to Electricity vs. Share of Renewable Electricity in Kenya', fontsize=16)\n",
"plt.xlabel('Access to Electricity (% of Population)', fontsize=12)\n",
"plt.ylabel('Share of Renewable Electricity (%)', fontsize=12)\n",
"\n",
"\n",
"# Add the legend\n",
"plt.legend()\n",
"\n",
"plt.savefig('regression.png', dpi=300, bbox_inches='tight')\n",
"\n",
"# Show plot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "0aa2149a-7bbc-4454-890a-5bcb801fb4bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{rrrrr}\n",
"\\toprule\n",
"Year & GDP per capita & Population percentage with access to electricity & Share of electricity produced from renewable sources & GDP growth rate \\\\\n",
"\\midrule\n",
"2000 & 617.139000 & 15.175694 & 40.371230 & -4.745007 \\\\\n",
"2001 & 617.047000 & 17.048136 & 59.459465 & -0.014908 \\\\\n",
"2002 & 611.893000 & 18.912030 & 67.572815 & -0.835269 \\\\\n",
"2003 & 668.475000 & 16.000000 & 73.357664 & 9.247042 \\\\\n",
"2004 & 692.709000 & 22.642206 & 62.824677 & 3.625266 \\\\\n",
"2005 & 778.323000 & 24.522501 & 59.701494 & 12.359302 \\\\\n",
"2006 & 854.981000 & 26.422052 & 56.591209 & 9.849124 \\\\\n",
"2007 & 1028.226000 & 28.342442 & 67.016490 & 20.263023 \\\\\n",
"2008 & 1118.755000 & 30.280056 & 65.040647 & 8.804387 \\\\\n",
"2009 & 1123.268000 & 23.000000 & 53.435117 & 0.403395 \\\\\n",
"2010 & 1176.311000 & 19.200000 & 67.877096 & 4.722203 \\\\\n",
"2011 & 1178.599000 & 36.157864 & 63.218392 & 0.194506 \\\\\n",
"2012 & 1396.220000 & 38.125990 & 71.725830 & 18.464380 \\\\\n",
"2013 & 1490.422000 & 40.092150 & 72.524755 & 6.746931 \\\\\n",
"2014 & 1613.101000 & 36.000000 & 71.304350 & 8.231159 \\\\\n",
"2015 & 1625.176000 & 41.600000 & 85.015288 & 0.748558 \\\\\n",
"2016 & 1688.852000 & 53.100000 & 83.884296 & 3.918099 \\\\\n",
"2017 & 1805.398000 & 55.831993 & 74.422903 & 6.900901 \\\\\n",
"2018 & 1987.302000 & 61.180614 & 85.093162 & 10.075562 \\\\\n",
"2019 & 2107.735000 & 69.700000 & 86.919106 & 6.060126 \\\\\n",
"2020 & 2067.987000 & 71.492714 & 92.327584 & -1.885816 \\\\\n",
"2021 & 2208.691000 & 76.542450 & 90.057995 & 6.803911 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(merged_data.drop(['Country'], axis=1).rename(columns={'GDP_per_capita':'GDP per capita', 'electricity_access':'Population percentage with access to electricity', 'GDP_growth':'GDP growth rate', 'Renewable':'Share of electricity produced from renewable sources'}).to_latex(index=False))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}