In [61]:
import pandas as pd


# Load the GDP and GHG data CSVs
gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\n (U.S. dollars per capita)':'Country'})
gdp_data["Country"] = gdp_data["Country"].replace({"Türkiye": "Turkey"})

mig_data = pd.read_csv('net-migration-rate.csv').rename(columns={'Entity':'Country', 'Net migration rate':'migration_rate'})

selected_countries = [
    # High-Income Countries
    "United States", "Canada", "Germany", "Sweden", "Australia", "Japan",
    
    # Emerging Economies
    "Turkey", "Mexico", "Brazil", "South Africa", "Malaysia",
    
    # Middle-Income Countries
    "Poland", "Thailand", "Morocco", "Philippines",
    
    # Oil-Rich Economies with High Migration Rates
    "United Arab Emirates", "Saudi Arabia", "Qatar",
    
    # Selected EU Countries with Varied Economies
    "France", "Italy", "Spain", "Netherlands",

    # High-Emigration or Conflict-Affected Countries
    "Syria", "Iraq", "Israel", "Ukraine"
]

gdp_data=gdp_data[gdp_data['Country'].isin(selected_countries)]
mig_data=mig_data[mig_data['Country'].isin(selected_countries)]

missing_countries = [country for country in selected_countries if country not in gdp_data['Country'].unique()]

# Check if all data has been filtered correctly
if not missing_countries:
    print("All specified countries are present in the dataframe.")
else:
    print("The following countries are missing from the dataframe:", missing_countries)


# Reshape data to long format
gdp_long = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')
gdp_long['Year'] = pd.to_numeric(gdp_long['Year'])
mig_long = mig_data

gdp_long = gdp_long[gdp_long['Year'] >= 1980]
mig_long = mig_long[mig_long['Year'] >= 1980]

# Merge on 'Country' and 'Year'
combined_data = pd.merge(gdp_long, mig_long, on=['Country', 'Year'], how='inner')

combined_data.dropna(inplace=True)
combined_data = combined_data[combined_data['GDP_per_capita'] != 'no data']
combined_data['GDP_per_capita'] = pd.to_numeric(combined_data['GDP_per_capita'])


# Time-lagged correlation analysis
max_lag = 5  # Define the maximum lag in years
correlations = {}

for lag in range(0, max_lag + 1):
    # Shift 'migration_rate' by the lag period
    combined_data[f'migration_rate_lag{lag}'] = combined_data.groupby('Country')['migration_rate'].shift(lag)
    
    # Calculate correlation after dropping NaN values from the lagged column
    temp_df = combined_data.dropna(subset=[f'migration_rate_lag{lag}', 'GDP_per_capita'])
    correlation = temp_df['GDP_per_capita'].corr(temp_df[f'migration_rate_lag{lag}'])
    correlations[f'Lag {lag} years'] = correlation

# Display the time-lagged correlations
print("Time-Lagged Correlations between Migration Rate and GDP per Capita:")
for lag, corr in correlations.items():
    print(f"{lag}: {corr}")


All specified countries are present in the dataframe.
Time-Lagged Correlations between Migration Rate and GDP per Capita:
Lag 0 years: 0.34214797965638405
Lag 1 years: 0.3388847633697717
Lag 2 years: 0.3319923519804265
Lag 3 years: 0.3395785030448451
Lag 4 years: 0.36593211845461004
Lag 5 years: 0.396634339747823


In [93]:
import pandas as pd

# Load the GDP and migration data CSVs
gdp_data = pd.read_csv('imf_gdp_capita.csv').rename(columns={'GDP per capita, current prices\n (U.S. dollars per capita)':'Country'})
gdp_data["Country"] = gdp_data["Country"].replace({"Türkiye": "Turkey"})

mig_data = pd.read_csv('net-migration-rate.csv').rename(columns={'Entity':'Country', 'Net migration rate':'migration_rate'})

# Define lists for high-income and low-income/emerging countries
high_income_countries = [
    "United States", "Canada", "Germany", "Sweden", "Australia", "Japan",
    "France", "Italy", "Spain", "Netherlands"
]

low_income_emerging_countries = [
    "Turkey", "Mexico", "Brazil", "South Africa", "Malaysia",
    "Poland", "Thailand", "Morocco", "Philippines",
    "United Arab Emirates", "Saudi Arabia", "Qatar",
    "Syria", "Iraq", "Israel", "Ukraine",
]

# Filter data to selected countries
gdp_data = gdp_data[gdp_data['Country'].isin(high_income_countries + low_income_emerging_countries)]
mig_data = mig_data[mig_data['Country'].isin(high_income_countries + low_income_emerging_countries)]

# Reshape GDP data to long format
gdp_long = pd.melt(gdp_data, id_vars=['Country'], var_name='Year', value_name='GDP_per_capita')
gdp_long['Year'] = pd.to_numeric(gdp_long['Year'])
gdp_long = gdp_long[gdp_long['Year'] >= 1980]
mig_long = mig_data[mig_data['Year'] >= 1980]

# Merge GDP and migration data on 'Country' and 'Year'
combined_data = pd.merge(gdp_long, mig_long, on=['Country', 'Year'], how='inner')
combined_data.dropna(inplace=True)
combined_data['GDP_per_capita'] = pd.to_numeric(combined_data['GDP_per_capita'], errors='coerce')
combined_data.dropna(inplace=True)

# Define a function to perform time-lagged correlation for a given subset
def lagged_correlation_analysis(data, max_lag=8):
    correlations = {}
    for lag in range(1, max_lag + 1):
        # Ensure we work on a copy of the data to avoid warnings
        data_copy = data.copy()
        
        # Shift 'migration_rate' by the lag period
        data_copy.loc[:, f'migration_rate_lag{lag}'] = data_copy.groupby('Country')['migration_rate'].shift(lag)
        
        # Calculate correlation after dropping NaN values from the lagged column
        temp_df = data_copy.dropna(subset=[f'migration_rate_lag{lag}', 'GDP_per_capita'])
        correlation = temp_df['GDP_per_capita'].corr(temp_df[f'migration_rate_lag{lag}'])
        correlations[f'Lag {lag} years'] = correlation
    return correlations

# Split the data into high-income and low-income/emerging subsets
high_income_data = combined_data[combined_data['Country'].isin(high_income_countries)].copy()  # Ensure it's a copy
low_income_data = combined_data[combined_data['Country'].isin(low_income_emerging_countries)].copy()  # Ensure it's a copy

# Perform lagged correlation analysis for each subset
print("High-Income Countries - Time-Lagged Correlations:")
high_income_correlations = lagged_correlation_analysis(high_income_data)
for lag, corr in high_income_correlations.items():
    print(f"{lag}: {corr}")

print("\n")

print("Low-Income/Emerging Countries - Time-Lagged Correlations:")
low_income_correlations = lagged_correlation_analysis(low_income_data)
for lag, corr in low_income_correlations.items():
    print(f"{lag}: {corr}")


High-Income Countries - Time-Lagged Correlations:
Lag 1 years: 0.3121019297546415
Lag 2 years: 0.31833806913767154
Lag 3 years: 0.30895190706328973
Lag 4 years: 0.31766166313177696
Lag 5 years: 0.33063243774520495
Lag 6 years: 0.30794658847506867
Lag 7 years: 0.2710641464391848
Lag 8 years: 0.22817143887640484


Low-Income/Emerging Countries - Time-Lagged Correlations:
Lag 1 years: 0.5842059724713272
Lag 2 years: 0.5776323197176021
Lag 3 years: 0.6036947175204658
Lag 4 years: 0.6451189295107449
Lag 5 years: 0.6952644943691771
Lag 6 years: 0.7115746646090384
Lag 7 years: 0.7145088255825862
Lag 8 years: 0.6960191461380354


In [141]:
remittances_data = pd.read_csv("personal-remittances-oda.csv").rename(columns={'Entity':'Country', "Personal remittances, received (current US$)":"remittances"})


combined_remmitance_data = pd.merge(low_income_data, remittances_data, on=['Country', 'Year'], how='left').copy()

combined_remmitance_data= combined_remmitance_data.drop(["Code_x", "Code_y", "Net official development assistance and official aid received (current US$)"],axis=1)
combined_remmitance_data['GDP_growth'] = combined_remmitance_data.groupby('Country')['GDP_per_capita'].pct_change() * 100  # GDP growth in percentage

combined_remmitance_data.dropna(subset=['GDP_per_capita','GDP_growth', 'migration_rate', 'remittances'], inplace=True)

print(combined_remmitance_data)


          Country  Year  GDP_per_capita  migration_rate   remittances  \
14         Brazil  1981        1382.548          -0.532  1.240000e+08   
15         Israel  1981        6729.335           4.214  5.318000e+08   
17         Mexico  1981        4461.156          -5.252  1.220000e+09   
18        Morocco  1981         969.430          -1.128  1.013863e+09   
19    Philippines  1981         829.816          -2.727  8.000000e+08   
..            ...   ...             ...             ...           ...   
635  Saudi Arabia  2022       34454.196          30.367  2.870193e+08   
636  South Africa  2022        6629.419           3.740  8.728556e+08   
637      Thailand  2022        7072.418           0.302  8.912474e+09   
638        Turkey  2022       10621.485          -3.464  6.940000e+08   
639       Ukraine  2022        4661.561        -138.846  1.671500e+10   

     GDP_growth  
14    12.412969  
15     5.203080  
17    25.408308  
18   -20.873515  
19     7.144370  
..          ...

In [142]:
from statsmodels.formula.api import ols
import statsmodels.api as sm

# Prepare the independent variable
X = combined_remmitance_data['migration_rate'] 
X = sm.add_constant(X)  # Adds a constant term for the regression

# Dependent variable: GDP growth
y = combined_remmitance_data['GDP_growth']

# Perform fixed effects regression
model = sm.OLS(y, X).fit()

# Summary of the regression model
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:             GDP_growth   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.002
Method:                 Least Squares   F-statistic:                    0.1046
Date:                Wed, 13 Nov 2024   Prob (F-statistic):              0.747
Time:                        21:43:59   Log-Likelihood:                -2070.1
No. Observations:                 512   AIC:                             4144.
Df Residuals:                     510   BIC:                             4153.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const              5.3871      0.618      8.