bi-sep/.ipynb_checkpoints/electricity_kenya-checkpoint.ipynb

254 lines
78 KiB
Text
Raw Normal View History

2024-11-17 22:37:14 +01:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"id": "c5fc2bff-487f-456d-972e-b54c3b6b8dab",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"\n",
"gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\\n (U.S. dollars per capita)':'Country'})\n",
"gdp_data = gdp_data[gdp_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"gdp_data = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')\n",
"gdp_data['Year'] = pd.to_numeric(gdp_data['Year'])\n",
"gdp_data['GDP_per_capita'] = pd.to_numeric(gdp_data['GDP_per_capita'])\n",
"\n",
"el_access_data = pd.read_csv('share-of-the-population-with-access-to-electricity.csv').rename(columns={'Entity':'Country', 'Access to electricity (% of population)':'electricity_access'})\n",
"el_access_data = el_access_data[el_access_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"\n",
"electricity_data = pd.read_csv('share-elec-by-source.csv').rename(columns={'Entity':'Country'})\n",
"electricity_data = electricity_data[electricity_data[\"Country\"] == \"Kenya\"]\n",
"\n",
"renewable_columns = ['Hydro - % electricity', 'Solar - % electricity', \n",
" 'Wind - % electricity', 'Other renewables excluding bioenergy - % electricity']\n",
"non_renewable_columns = ['Coal - % electricity', 'Gas - % electricity', \n",
" 'Oil - % electricity', 'Nuclear - % electricity']\n",
"\n",
"electricity_data['Renewable'] = electricity_data[renewable_columns].sum(axis=1)\n",
"electricity_data['Non-renewable'] = electricity_data[non_renewable_columns].sum(axis=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "9cf6077e-536d-43e7-8663-c9087b8af7f2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Country Year GDP_per_capita electricity_access Renewable GDP_growth\n",
"20 Kenya 2000 617.139 15.175694 40.371230 -4.745007\n",
"21 Kenya 2001 617.047 17.048136 59.459465 -0.014908\n",
"22 Kenya 2002 611.893 18.912030 67.572815 -0.835269\n",
"23 Kenya 2003 668.475 16.000000 73.357664 9.247042\n",
"24 Kenya 2004 692.709 22.642206 62.824677 3.625266\n",
"25 Kenya 2005 778.323 24.522501 59.701494 12.359302\n",
"26 Kenya 2006 854.981 26.422052 56.591209 9.849124\n",
"27 Kenya 2007 1028.226 28.342442 67.016490 20.263023\n",
"28 Kenya 2008 1118.755 30.280056 65.040647 8.804387\n",
"29 Kenya 2009 1123.268 23.000000 53.435117 0.403395\n",
"30 Kenya 2010 1176.311 19.200000 67.877096 4.722203\n",
"31 Kenya 2011 1178.599 36.157864 63.218392 0.194506\n",
"32 Kenya 2012 1396.220 38.125990 71.725830 18.464380\n",
"33 Kenya 2013 1490.422 40.092150 72.524755 6.746931\n",
"34 Kenya 2014 1613.101 36.000000 71.304350 8.231159\n",
"35 Kenya 2015 1625.176 41.600000 85.015288 0.748558\n",
"36 Kenya 2016 1688.852 53.100000 83.884296 3.918099\n",
"37 Kenya 2017 1805.398 55.831993 74.422903 6.900901\n",
"38 Kenya 2018 1987.302 61.180614 85.093162 10.075562\n",
"39 Kenya 2019 2107.735 69.700000 86.919106 6.060126\n",
"40 Kenya 2020 2067.987 71.492714 92.327584 -1.885816\n",
"41 Kenya 2021 2208.691 76.542450 90.057995 6.803911\n",
"Pearson Correlation with GDP Growth:\n",
" electricity_access Renewable GDP_growth\n",
"electricity_access 1.000000 0.834904 0.048234\n",
"Renewable 0.834904 1.000000 0.124918\n",
"GDP_growth 0.048234 0.124918 1.000000\n",
"\n",
"Regression Results with GDP Growth as Control:\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: Renewable R-squared: 0.704\n",
"Model: OLS Adj. R-squared: 0.673\n",
"Method: Least Squares F-statistic: 22.62\n",
"Date: Sun, 17 Nov 2024 Prob (F-statistic): 9.42e-06\n",
"Time: 19:14:23 Log-Likelihood: -73.771\n",
"No. Observations: 22 AIC: 153.5\n",
"Df Residuals: 19 BIC: 156.8\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"======================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"--------------------------------------------------------------------------------------\n",
"const 48.4419 3.787 12.791 0.000 40.515 56.368\n",
"electricity_access 0.5613 0.084 6.651 0.000 0.385 0.738\n",
"GDP_growth 0.1767 0.260 0.679 0.505 -0.368 0.721\n",
"==============================================================================\n",
"Omnibus: 0.546 Durbin-Watson: 1.538\n",
"Prob(Omnibus): 0.761 Jarque-Bera (JB): 0.060\n",
"Skew: 0.121 Prob(JB): 0.970\n",
"Kurtosis: 3.083 Cond. No. 101.\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
]
}
],
"source": [
"import statsmodels.api as sm\n",
"from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
"\n",
"# Merge data on the Year column\n",
"merged_data = pd.merge(gdp_data, el_access_data[['Year', 'electricity_access']], on='Year', how='left')\n",
"merged_data = pd.merge(merged_data, electricity_data[['Year', 'Renewable']], on='Year', how='left')\n",
"\n",
"# Calculate year-on-year GDP growth\n",
"merged_data['GDP_growth'] = merged_data['GDP_per_capita'].pct_change() * 100 # Percentage change\n",
"\n",
"# Drop NaN values that might have been generated by pct_change\n",
"merged_data.dropna(subset=['electricity_access', 'Renewable', 'GDP_growth'], inplace=True)\n",
"\n",
"\n",
"# Step 1: Correlation analysis (now with GDP growth included)\n",
"correlation = merged_data[['electricity_access', 'Renewable', 'GDP_growth']].corr()\n",
"print(\"Pearson Correlation with GDP Growth:\")\n",
"print(correlation)\n",
"\n",
"# Step 2: Regression analysis (with GDP growth as a control)\n",
"X = merged_data[['electricity_access', 'GDP_growth']] # Include GDP growth as a predictor\n",
"y = merged_data['Renewable']\n",
"\n",
"# Add constant for intercept\n",
"X = sm.add_constant(X)\n",
"\n",
"# Run the regression model\n",
"model = sm.OLS(y, X).fit()\n",
"\n",
"# Print regression results\n",
"print(\"\\nRegression Results with GDP Growth as Control:\")\n",
"print(model.summary())"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "76beea43-efc1-4184-92dc-2c8e7285bf8a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1MAAAIoCAYAAACMKrEqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADFhklEQVR4nOzdd3zT1foH8E/Ske5FoYM9SqFQoOxddiltVdwXvYJeBcHFRS/qVRFQERBwoYheLyAi/q7bFpBdNjJFlmXKLgW6Z9rm/P44JmmalCZt0iTt5/169QXN+SZ5MprkyXPOcxRCCAEiIiIiIiKyiNLeARARERERETkjJlNEREREREQ1wGSKiIiIiIioBphMERERERER1QCTKSIiIiIiohpgMkVERERERFQDTKaIiIiIiIhqgMkUERERERFRDTCZIiIiIiIiqgEmU0QORKFQQKFQ2DuMWmvVqhUUCgX+/PPPWl3O8uXLoVAoMGHCBKvERYZSU1OhUCgwZMgQe4fikMrLyzFnzhxERkbC3d2d95UDqM1z1hleX50hRnPUx/eAP//8EwqFAq1atbJ3KORgmEw5gejoaCgUCnh6eiI3N9fe4TiV1NRUzJw5E6mpqXV6vdo3xOp+6jqu6rz33nuYOXMmsrOz7R3KbWVnZ2PmzJl477337B2KQ1m/fj3uvvtuNG3aFO7u7ggICEBkZCSSkpKwcOFCnD171t4hOpUZM2bglVdewZ9//onOnTtjwIABiI6OrvZ82g+AFX+USiUCAwPRr18/LFiwAMXFxXVwC8he+B5gWw3hPWDIkCFQKBSYOXOmyfGSkhKMGTMGCoUCISEhOHbsWN0GSDqu9g6Abu+3337T/YEUFxfj22+/xWOPPWbnqJxHamoqZs2aBQB2+Ua5c+fO8Pf3r3L8dmP28N577+HChQuYMGECAgICanw5bdu2hYeHB9zc3GoVj7+/PyIjIxEWFmZwenZ2NmbNmoWWLVti6tSptbqO+uLpp5/GRx99BADw9vZGREQEvLy8cOHCBaSkpCAlJQXXrl3DggUL7BypcxBC4JNPPoFCocCuXbvQs2dPiy9DpVLpzldeXo6LFy9i79692Lt3L1avXo3U1FT4+vpaO3RyIHwPqD/vAW5uboiMjETTpk3r5Ppup7i4GGPHjsUvv/yC0NBQbNmyBR07drR3WA0WkykHt3LlSgBAQEAAsrOzsXLlSiZTTuTDDz9skNOCNm/ebJXLGTt2LMaOHWuVy6rPVq9ejY8++ghKpRLvvvsuJk2aBJVKpRs/ceIEvvrqK/j5+dkxSudy48YNZGZmokmTJjVKpAAgNDQUO3fuNDht/fr1uOeee3Do0CHMnTsXb731ljXCJQfF94DacaT3gKZNm+KPP/6wdxgoKirCXXfdhQ0bNiA8PBxbtmxBZGSkvcNq0DjNz4GVl5dj9erVAIDFixfDxcUF27Ztw8WLF+0cGRE5khUrVgAAHnvsMTz77LMGiRQAREVF4c0338T06dPtEZ5TKioqAgB4enpa9XLj4uLwz3/+EwDw/fffW/Wyiah+KyoqQlJSEjZs2ICmTZsiNTWViZQDYDLlwDZt2oRr164hNDQUDz74IIYNGwYhBFatWnXb8xUWFmLBggXo27cvAgIC4OXlhYiICPz973/Htm3bjI4XQuCbb77BmDFj0KRJE6hUKrRo0QLx8fFYvny5yevYt28fHnzwQd3ajJCQENx33304fPiwyeMvXLiASZMmoU2bNlCpVPD19UWbNm0wduxYfP3110bHJycnIy4uDsHBwXBzc0Pjxo3RpUsXPPPMMzh58mT1dx7knHXtFL9Zs2YZzFOvvJi1oKAAb775Jrp06QJvb2/4+fmhT58++Oijj1BWVmbW9dmaEAJff/01Ro4ciUaNGkGlUqFNmzZ49tlnkZ6eXuX5MjMz8frrryMmJgZ+fn7w8fFBx44d8eSTT+oeL+0ajwsXLgAAWrdubXJef8XF32VlZZg/fz6io6Ph5eVlsCi3usXHGzduxN13343w8HCoVCqEh4dj6NCh+Oijj1BSUqI7ztTi4wkTJqB169YA5POq8hoEAHjwwQehUCiwcOHCKu+Xb7/9FgqFAr169aryGK3FixdDoVAgMTGxymMyMzPh7u4ONzc33Lp1S3f6sWPH8NBDD6F58+a6dUwREREYN24cfvnll2qv2xznzp0DAHTr1q3Gl6HRaPD++++jc+fO8PDwQEhICP7xj3/gxo0bJo/fuHEjnn76aXTt2hVBQUHw8PBA27ZtMXny5Cq/8JkwYQIUCgWWL1+O8+fPY8KECWjatClcXV0N1gXU9Ll+OxcvXsTkyZPRunVrqFQqBAcHIz4+HuvWrTM6tuIi88rPMWuscdE+56r6+8jMzMQrr7yCzp07w9vbG76+vujbty8+++wzaDQao+Mr3q9Xr17FY489hrCwMHh4eKBTp0666Z9VMff1XKPRICgoCC4uLsjKyjIY27t3r+4++vzzz42uw9Rrwrlz5zBv3jwMGTIEzZs3h0qlQuPGjTF69GisWbPmtjEDQGlpKWbNmoX27dvDw8MDTZs2xVNPPYXMzMxqz1uZLZ5z1sb3AMke7wHA7RtQVLzudevWYfDgwfD19YW/vz/i4+Or/GxkicLCQiQkJGDz5s1o3rw5tm3bhoiICJPHXr58Gc8++yzat28PT09PBAQEYOjQofj2229NHq9dn5Wamoo//vgD9913H4KDg+Hp6YkePXrgf//7n8Hx5eXlaNasGRQKBQ4ePFhlzE8//TQUCgX+9a9/6U7Lzs7G559/jjvvvBPt2rWDp6cn/P390adPH3zwwQcO85nLIoIc1rhx4wQA8dxzzwkhhFi+fLkAIDp27FjleS5cuCA6duwoAAgAIiIiQnTv3l0EBQUJACI2Ntbg+JKSEjF27Fjd8WFhYaJXr16iadOmQqFQCFNPkUWLFunGgoKCRExMjGjUqJEAINzc3MR3331ncPz58+dFcHCwACC8vLxEdHS06Natmy6mrl27Ghz/4Ycf6uIJDQ0VPXv2FBEREcLDw0MAEO+++65Z99+AAQNE8+bNBQDRvHlzMWDAAN3PW2+9pTsuIyNDREdHCwBCqVSKLl26GNyHI0eOFEVFRWZdp5b2vFu3bq3R+SpTq9Xivvvu042Hh4eLrl27Ci8vL93jlpaWZnS+3377TYSHh+tuW1RUlOjWrZvw8/MTAMT48eOFEEKsXbtWDBgwQKhUKgFA9OzZ0+D+OnTokBBCiK1btwoAYvDgwSIhIUEAEG3bthU9evQQnTp10l1vy5YtBQBx/vx5o5ieeuop3e1o1KiR6Nmzp2jZsqVQKpVG51m2bJlBnEII8dZbb4mePXsKAEKlUhnEOWDAACGEEOvXrxcARHR0dJX3dWJiogAgFi9efLuHRAghnyOurq7Czc1N3Lp1y+QxS5cuFQDEmDFjdKf9+uuvwtPTUwAQ/v7+omvXrqJz587C399fABB33nlntddtjh49eggA4pFHHrHofNrHMzY2Vvd6ExERITp16iRcXV0FANGpUydRXFxsdF4XFxehUChEkyZNRLdu3UTnzp2Ft7e37nE9fvy40XnGjx8vAIiXXnpJBAQECJVKJbp37y46dOggZs6cKYSo+XP9dvbu3SsCAgIEAOHt7S169OghmjVrpruO1157zeD4AQMGVPkc0/4t3I72eduyZUuT46tXr9a9flZ27Ngx0bRpUwFAuLu7i6ioKNG2bVvda+69994rNBqNwXm09+vMmTNFaGio8PDwEN27d9f97QMQb775pslYLH091/7d/Pzzzwanz5s3T3ddf//73w3GLly4IACIFi1aGJz+j3/8QwAQPj4+on379qJnz54iLCxMdzlz5841itfUa1BERITo1q2b7jnbrl07cf36daPzWvv19Xb4HlC/3gOEkJ9lqvq71t6eJUuWCIVCIcLCwkT37t11r4k+Pj7i5MmTZl2PVmxsrAAgXn/9dZGfny+GDBmiu/5z585Veb7U1FTde4ynp6eIjo7WfRYCIJ5//vkqr2vBggXCx8dH+Pr6ih49eojGjRvrzrdy5UqD87z
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Plot scatter plot with regression line\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"sns.regplot(\n",
" x='electricity_access', y='Renewable', data=merged_data, \n",
" scatter_kws={'color': 'blue', 'label': 'Data points'}, \n",
" line_kws={'color': 'red', 'linewidth': 2, 'label': 'Regression line'}\n",
")\n",
"\n",
"# Add titles and labels\n",
"plt.title('Access to Electricity vs. Share of Renewable Electricity in Kenya', fontsize=16)\n",
"plt.xlabel('Access to Electricity (% of Population)', fontsize=12)\n",
"plt.ylabel('Share of Renewable Electricity (%)', fontsize=12)\n",
"\n",
"\n",
"# Add the legend\n",
"plt.legend()\n",
"\n",
"plt.savefig('regression.png', dpi=300, bbox_inches='tight')\n",
"\n",
"# Show plot\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "0aa2149a-7bbc-4454-890a-5bcb801fb4bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\\begin{tabular}{rrrrr}\n",
"\\toprule\n",
"Year & GDP per capita & Population percentage with access to electricity & Share of electricity produced from renewable sources & GDP growth rate \\\\\n",
"\\midrule\n",
"2000 & 617.139000 & 15.175694 & 40.371230 & -4.745007 \\\\\n",
"2001 & 617.047000 & 17.048136 & 59.459465 & -0.014908 \\\\\n",
"2002 & 611.893000 & 18.912030 & 67.572815 & -0.835269 \\\\\n",
"2003 & 668.475000 & 16.000000 & 73.357664 & 9.247042 \\\\\n",
"2004 & 692.709000 & 22.642206 & 62.824677 & 3.625266 \\\\\n",
"2005 & 778.323000 & 24.522501 & 59.701494 & 12.359302 \\\\\n",
"2006 & 854.981000 & 26.422052 & 56.591209 & 9.849124 \\\\\n",
"2007 & 1028.226000 & 28.342442 & 67.016490 & 20.263023 \\\\\n",
"2008 & 1118.755000 & 30.280056 & 65.040647 & 8.804387 \\\\\n",
"2009 & 1123.268000 & 23.000000 & 53.435117 & 0.403395 \\\\\n",
"2010 & 1176.311000 & 19.200000 & 67.877096 & 4.722203 \\\\\n",
"2011 & 1178.599000 & 36.157864 & 63.218392 & 0.194506 \\\\\n",
"2012 & 1396.220000 & 38.125990 & 71.725830 & 18.464380 \\\\\n",
"2013 & 1490.422000 & 40.092150 & 72.524755 & 6.746931 \\\\\n",
"2014 & 1613.101000 & 36.000000 & 71.304350 & 8.231159 \\\\\n",
"2015 & 1625.176000 & 41.600000 & 85.015288 & 0.748558 \\\\\n",
"2016 & 1688.852000 & 53.100000 & 83.884296 & 3.918099 \\\\\n",
"2017 & 1805.398000 & 55.831993 & 74.422903 & 6.900901 \\\\\n",
"2018 & 1987.302000 & 61.180614 & 85.093162 & 10.075562 \\\\\n",
"2019 & 2107.735000 & 69.700000 & 86.919106 & 6.060126 \\\\\n",
"2020 & 2067.987000 & 71.492714 & 92.327584 & -1.885816 \\\\\n",
"2021 & 2208.691000 & 76.542450 & 90.057995 & 6.803911 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\n"
]
}
],
"source": [
"print(merged_data.drop(['Country'], axis=1).rename(columns={'GDP_per_capita':'GDP per capita', 'electricity_access':'Population percentage with access to electricity', 'GDP_growth':'GDP growth rate', 'Renewable':'Share of electricity produced from renewable sources'}).to_latex(index=False))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}