341 lines
15 KiB
Text
341 lines
15 KiB
Text
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 61,
|
||
|
"id": "61e50c93-ad8f-4fd6-a4ad-ce1a9667f29b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"All specified countries are present in the dataframe.\n",
|
||
|
"Time-Lagged Correlations between Migration Rate and GDP per Capita:\n",
|
||
|
"Lag 0 years: 0.34214797965638405\n",
|
||
|
"Lag 1 years: 0.3388847633697717\n",
|
||
|
"Lag 2 years: 0.3319923519804265\n",
|
||
|
"Lag 3 years: 0.3395785030448451\n",
|
||
|
"Lag 4 years: 0.36593211845461004\n",
|
||
|
"Lag 5 years: 0.396634339747823\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# Load the GDP and GHG data CSVs\n",
|
||
|
"gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\\n (U.S. dollars per capita)':'Country'})\n",
|
||
|
"gdp_data[\"Country\"] = gdp_data[\"Country\"].replace({\"Türkiye\": \"Turkey\"})\n",
|
||
|
"\n",
|
||
|
"mig_data = pd.read_csv('net-migration-rate.csv').rename(columns={'Entity':'Country', 'Net migration rate':'migration_rate'})\n",
|
||
|
"\n",
|
||
|
"selected_countries = [\n",
|
||
|
" # High-Income Countries\n",
|
||
|
" \"United States\", \"Canada\", \"Germany\", \"Sweden\", \"Australia\", \"Japan\",\n",
|
||
|
" \n",
|
||
|
" # Emerging Economies\n",
|
||
|
" \"Turkey\", \"Mexico\", \"Brazil\", \"South Africa\", \"Malaysia\",\n",
|
||
|
" \n",
|
||
|
" # Middle-Income Countries\n",
|
||
|
" \"Poland\", \"Thailand\", \"Morocco\", \"Philippines\",\n",
|
||
|
" \n",
|
||
|
" # Oil-Rich Economies with High Migration Rates\n",
|
||
|
" \"United Arab Emirates\", \"Saudi Arabia\", \"Qatar\",\n",
|
||
|
" \n",
|
||
|
" # Selected EU Countries with Varied Economies\n",
|
||
|
" \"France\", \"Italy\", \"Spain\", \"Netherlands\",\n",
|
||
|
"\n",
|
||
|
" # High-Emigration or Conflict-Affected Countries\n",
|
||
|
" \"Syria\", \"Iraq\", \"Israel\", \"Ukraine\"\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"gdp_data=gdp_data[gdp_data['Country'].isin(selected_countries)]\n",
|
||
|
"mig_data=mig_data[mig_data['Country'].isin(selected_countries)]\n",
|
||
|
"\n",
|
||
|
"missing_countries = [country for country in selected_countries if country not in gdp_data['Country'].unique()]\n",
|
||
|
"\n",
|
||
|
"# Check if all data has been filtered correctly\n",
|
||
|
"if not missing_countries:\n",
|
||
|
" print(\"All specified countries are present in the dataframe.\")\n",
|
||
|
"else:\n",
|
||
|
" print(\"The following countries are missing from the dataframe:\", missing_countries)\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# Reshape data to long format\n",
|
||
|
"gdp_long = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')\n",
|
||
|
"gdp_long['Year'] = pd.to_numeric(gdp_long['Year'])\n",
|
||
|
"mig_long = mig_data\n",
|
||
|
"\n",
|
||
|
"gdp_long = gdp_long[gdp_long['Year'] >= 1980]\n",
|
||
|
"mig_long = mig_long[mig_long['Year'] >= 1980]\n",
|
||
|
"\n",
|
||
|
"# Merge on 'Country' and 'Year'\n",
|
||
|
"combined_data = pd.merge(gdp_long, mig_long, on=['Country', 'Year'], how='inner')\n",
|
||
|
"\n",
|
||
|
"combined_data.dropna(inplace=True)\n",
|
||
|
"combined_data = combined_data[combined_data['GDP_per_capita'] != 'no data']\n",
|
||
|
"combined_data['GDP_per_capita'] = pd.to_numeric(combined_data['GDP_per_capita'])\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"# Time-lagged correlation analysis\n",
|
||
|
"max_lag = 5 # Define the maximum lag in years\n",
|
||
|
"correlations = {}\n",
|
||
|
"\n",
|
||
|
"for lag in range(0, max_lag + 1):\n",
|
||
|
" # Shift 'migration_rate' by the lag period\n",
|
||
|
" combined_data[f'migration_rate_lag{lag}'] = combined_data.groupby('Country')['migration_rate'].shift(lag)\n",
|
||
|
" \n",
|
||
|
" # Calculate correlation after dropping NaN values from the lagged column\n",
|
||
|
" temp_df = combined_data.dropna(subset=[f'migration_rate_lag{lag}', 'GDP_per_capita'])\n",
|
||
|
" correlation = temp_df['GDP_per_capita'].corr(temp_df[f'migration_rate_lag{lag}'])\n",
|
||
|
" correlations[f'Lag {lag} years'] = correlation\n",
|
||
|
"\n",
|
||
|
"# Display the time-lagged correlations\n",
|
||
|
"print(\"Time-Lagged Correlations between Migration Rate and GDP per Capita:\")\n",
|
||
|
"for lag, corr in correlations.items():\n",
|
||
|
" print(f\"{lag}: {corr}\")\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 93,
|
||
|
"id": "d46e416d-6c33-4804-be9d-9df31637c0f8",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"High-Income Countries - Time-Lagged Correlations:\n",
|
||
|
"Lag 1 years: 0.3121019297546415\n",
|
||
|
"Lag 2 years: 0.31833806913767154\n",
|
||
|
"Lag 3 years: 0.30895190706328973\n",
|
||
|
"Lag 4 years: 0.31766166313177696\n",
|
||
|
"Lag 5 years: 0.33063243774520495\n",
|
||
|
"Lag 6 years: 0.30794658847506867\n",
|
||
|
"Lag 7 years: 0.2710641464391848\n",
|
||
|
"Lag 8 years: 0.22817143887640484\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"Low-Income/Emerging Countries - Time-Lagged Correlations:\n",
|
||
|
"Lag 1 years: 0.5842059724713272\n",
|
||
|
"Lag 2 years: 0.5776323197176021\n",
|
||
|
"Lag 3 years: 0.6036947175204658\n",
|
||
|
"Lag 4 years: 0.6451189295107449\n",
|
||
|
"Lag 5 years: 0.6952644943691771\n",
|
||
|
"Lag 6 years: 0.7115746646090384\n",
|
||
|
"Lag 7 years: 0.7145088255825862\n",
|
||
|
"Lag 8 years: 0.6960191461380354\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"\n",
|
||
|
"# Load the GDP and migration data CSVs\n",
|
||
|
"gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\\n (U.S. dollars per capita)':'Country'})\n",
|
||
|
"gdp_data[\"Country\"] = gdp_data[\"Country\"].replace({\"Türkiye\": \"Turkey\"})\n",
|
||
|
"\n",
|
||
|
"mig_data = pd.read_csv('net-migration-rate.csv').rename(columns={'Entity':'Country', 'Net migration rate':'migration_rate'})\n",
|
||
|
"\n",
|
||
|
"# Define lists for high-income and low-income/emerging countries\n",
|
||
|
"high_income_countries = [\n",
|
||
|
" \"United States\", \"Canada\", \"Germany\", \"Sweden\", \"Australia\", \"Japan\",\n",
|
||
|
" \"France\", \"Italy\", \"Spain\", \"Netherlands\"\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"low_income_emerging_countries = [\n",
|
||
|
" \"Turkey\", \"Mexico\", \"Brazil\", \"South Africa\", \"Malaysia\",\n",
|
||
|
" \"Poland\", \"Thailand\", \"Morocco\", \"Philippines\",\n",
|
||
|
" \"United Arab Emirates\", \"Saudi Arabia\", \"Qatar\",\n",
|
||
|
" \"Syria\", \"Iraq\", \"Israel\", \"Ukraine\",\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"# Filter data to selected countries\n",
|
||
|
"gdp_data = gdp_data[gdp_data['Country'].isin(high_income_countries + low_income_emerging_countries)]\n",
|
||
|
"mig_data = mig_data[mig_data['Country'].isin(high_income_countries + low_income_emerging_countries)]\n",
|
||
|
"\n",
|
||
|
"# Reshape GDP data to long format\n",
|
||
|
"gdp_long = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')\n",
|
||
|
"gdp_long['Year'] = pd.to_numeric(gdp_long['Year'])\n",
|
||
|
"gdp_long = gdp_long[gdp_long['Year'] >= 1980]\n",
|
||
|
"mig_long = mig_data[mig_data['Year'] >= 1980]\n",
|
||
|
"\n",
|
||
|
"# Merge GDP and migration data on 'Country' and 'Year'\n",
|
||
|
"combined_data = pd.merge(gdp_long, mig_long, on=['Country', 'Year'], how='inner')\n",
|
||
|
"combined_data.dropna(inplace=True)\n",
|
||
|
"combined_data['GDP_per_capita'] = pd.to_numeric(combined_data['GDP_per_capita'], errors='coerce')\n",
|
||
|
"combined_data.dropna(inplace=True)\n",
|
||
|
"\n",
|
||
|
"# Define a function to perform time-lagged correlation for a given subset\n",
|
||
|
"def lagged_correlation_analysis(data, max_lag=8):\n",
|
||
|
" correlations = {}\n",
|
||
|
" for lag in range(1, max_lag + 1):\n",
|
||
|
" # Ensure we work on a copy of the data to avoid warnings\n",
|
||
|
" data_copy = data.copy()\n",
|
||
|
" \n",
|
||
|
" # Shift 'migration_rate' by the lag period\n",
|
||
|
" data_copy.loc[:, f'migration_rate_lag{lag}'] = data_copy.groupby('Country')['migration_rate'].shift(lag)\n",
|
||
|
" \n",
|
||
|
" # Calculate correlation after dropping NaN values from the lagged column\n",
|
||
|
" temp_df = data_copy.dropna(subset=[f'migration_rate_lag{lag}', 'GDP_per_capita'])\n",
|
||
|
" correlation = temp_df['GDP_per_capita'].corr(temp_df[f'migration_rate_lag{lag}'])\n",
|
||
|
" correlations[f'Lag {lag} years'] = correlation\n",
|
||
|
" return correlations\n",
|
||
|
"\n",
|
||
|
"# Split the data into high-income and low-income/emerging subsets\n",
|
||
|
"high_income_data = combined_data[combined_data['Country'].isin(high_income_countries)].copy() # Ensure it's a copy\n",
|
||
|
"low_income_data = combined_data[combined_data['Country'].isin(low_income_emerging_countries)].copy() # Ensure it's a copy\n",
|
||
|
"\n",
|
||
|
"# Perform lagged correlation analysis for each subset\n",
|
||
|
"print(\"High-Income Countries - Time-Lagged Correlations:\")\n",
|
||
|
"high_income_correlations = lagged_correlation_analysis(high_income_data)\n",
|
||
|
"for lag, corr in high_income_correlations.items():\n",
|
||
|
" print(f\"{lag}: {corr}\")\n",
|
||
|
"\n",
|
||
|
"print(\"\\n\")\n",
|
||
|
"\n",
|
||
|
"print(\"Low-Income/Emerging Countries - Time-Lagged Correlations:\")\n",
|
||
|
"low_income_correlations = lagged_correlation_analysis(low_income_data)\n",
|
||
|
"for lag, corr in low_income_correlations.items():\n",
|
||
|
" print(f\"{lag}: {corr}\")\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 141,
|
||
|
"id": "0f595d37-dd79-460a-9f38-9d43cc548dc5",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" Country Year GDP_per_capita migration_rate remittances \\\n",
|
||
|
"14 Brazil 1981 1382.548 -0.532 1.240000e+08 \n",
|
||
|
"15 Israel 1981 6729.335 4.214 5.318000e+08 \n",
|
||
|
"17 Mexico 1981 4461.156 -5.252 1.220000e+09 \n",
|
||
|
"18 Morocco 1981 969.430 -1.128 1.013863e+09 \n",
|
||
|
"19 Philippines 1981 829.816 -2.727 8.000000e+08 \n",
|
||
|
".. ... ... ... ... ... \n",
|
||
|
"635 Saudi Arabia 2022 34454.196 30.367 2.870193e+08 \n",
|
||
|
"636 South Africa 2022 6629.419 3.740 8.728556e+08 \n",
|
||
|
"637 Thailand 2022 7072.418 0.302 8.912474e+09 \n",
|
||
|
"638 Turkey 2022 10621.485 -3.464 6.940000e+08 \n",
|
||
|
"639 Ukraine 2022 4661.561 -138.846 1.671500e+10 \n",
|
||
|
"\n",
|
||
|
" GDP_growth \n",
|
||
|
"14 12.412969 \n",
|
||
|
"15 5.203080 \n",
|
||
|
"17 25.408308 \n",
|
||
|
"18 -20.873515 \n",
|
||
|
"19 7.144370 \n",
|
||
|
".. ... \n",
|
||
|
"635 21.334237 \n",
|
||
|
"636 -4.457650 \n",
|
||
|
"637 -2.280980 \n",
|
||
|
"638 11.330031 \n",
|
||
|
"639 -4.380080 \n",
|
||
|
"\n",
|
||
|
"[512 rows x 6 columns]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"remittances_data = pd.read_csv(\"personal-remittances-oda.csv\").rename(columns={'Entity':'Country', \"Personal remittances, received (current US$)\":\"remittances\"})\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"combined_remmitance_data = pd.merge(low_income_data, remittances_data, on=['Country', 'Year'], how='left').copy()\n",
|
||
|
"\n",
|
||
|
"combined_remmitance_data= combined_remmitance_data.drop([\"Code_x\", \"Code_y\", \"Net official development assistance and official aid received (current US$)\"],axis=1)\n",
|
||
|
"combined_remmitance_data['GDP_growth'] = combined_remmitance_data.groupby('Country')['GDP_per_capita'].pct_change() * 100 # GDP growth in percentage\n",
|
||
|
"\n",
|
||
|
"combined_remmitance_data.dropna(subset=['GDP_per_capita','GDP_growth', 'migration_rate', 'remittances'], inplace=True)\n",
|
||
|
"\n",
|
||
|
"print(combined_remmitance_data)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 142,
|
||
|
"id": "b530f3f3-1a2e-4ba0-b4a6-b1252bea91ad",
|
||
|
"metadata": {
|
||
|
"scrolled": true
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" OLS Regression Results \n",
|
||
|
"==============================================================================\n",
|
||
|
"Dep. Variable: GDP_growth R-squared: 0.000\n",
|
||
|
"Model: OLS Adj. R-squared: -0.002\n",
|
||
|
"Method: Least Squares F-statistic: 0.1046\n",
|
||
|
"Date: Wed, 13 Nov 2024 Prob (F-statistic): 0.747\n",
|
||
|
"Time: 21:43:59 Log-Likelihood: -2070.1\n",
|
||
|
"No. Observations: 512 AIC: 4144.\n",
|
||
|
"Df Residuals: 510 BIC: 4153.\n",
|
||
|
"Df Model: 1 \n",
|
||
|
"Covariance Type: nonrobust \n",
|
||
|
"==================================================================================\n",
|
||
|
" coef std err t P>|t| [0.025 0.975]\n",
|
||
|
"----------------------------------------------------------------------------------\n",
|
||
|
"const 5.3871 0.618 8.715 0.000 4.173 6.602\n",
|
||
|
"migration_rate 0.0106 0.033 0.323 0.747 -0.054 0.075\n",
|
||
|
"==============================================================================\n",
|
||
|
"Omnibus: 23.597 Durbin-Watson: 1.421\n",
|
||
|
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 42.474\n",
|
||
|
"Skew: -0.302 Prob(JB): 5.98e-10\n",
|
||
|
"Kurtosis: 4.276 Cond. No. 19.1\n",
|
||
|
"==============================================================================\n",
|
||
|
"\n",
|
||
|
"Notes:\n",
|
||
|
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"from statsmodels.formula.api import ols\n",
|
||
|
"import statsmodels.api as sm\n",
|
||
|
"\n",
|
||
|
"# Prepare the independent variable\n",
|
||
|
"X = combined_remmitance_data['migration_rate'] \n",
|
||
|
"X = sm.add_constant(X) # Adds a constant term for the regression\n",
|
||
|
"\n",
|
||
|
"# Dependent variable: GDP growth\n",
|
||
|
"y = combined_remmitance_data['GDP_growth']\n",
|
||
|
"\n",
|
||
|
"# Perform fixed effects regression\n",
|
||
|
"model = sm.OLS(y, X).fit()\n",
|
||
|
"\n",
|
||
|
"# Summary of the regression model\n",
|
||
|
"print(model.summary())"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.12.6"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|