Equity Lifecycle

Markov chain model for Open system

Author

GitSAM

Published

April 7, 2025

Keywords

survivorship bias, truncation, conservative system

1 Data Pre-Processing

Code
import numpy as np
import pandas as pd

import sqlite3
con = sqlite3.connect(database="../../tbtf.sqlite")

crsp = pd.read_sql_query(
  sql="SELECT * FROM crsp",
  con=con,
  parse_dates={"date"}
)

import sys
import os
# 현재 경로 기준으로 상위 디렉토리로 경로 추가
sys.path.append(os.path.abspath('../..'))

import tbtf_data

ff3 = tbtf_data.load_ff3_factors(start_date=crsp['date'].min(), end_date=crsp['date'].max())
crsp_trunc = tbtf_data.crsp_truncation(crsp,ff3)
crsp_full = tbtf_data.crsp_lifecycle(crsp_trunc)

print(crsp_full.head())
   permno       date     mktcap       ret primaryexch  siccd   industry  \
0   10001 1996-01-31  20.814125 -0.026667           Q   4925  Utilities   
1   10001 1996-02-29  21.099250  0.013699           Q   4925  Utilities   
2   10001 1996-03-31  21.899480  0.036423           Q   4925  Utilities   
3   10001 1996-04-30  20.348063 -0.070840           Q   4925  Utilities   
4   10001 1996-05-31  19.915125 -0.021277           Q   4925  Utilities   

   mktcap_lag  ret_excess  state  state_lag   lifetype  life  
0   21.384375   -0.030967      3          0  old-death   260  
1   20.814125    0.009799      3          3  old-death   260  
2   21.099250    0.032523      3          3  old-death   260  
3   21.899480   -0.075440      2          3  old-death   260  
4   20.348063   -0.025477      2          2  old-death   260  

2 Life Type of Firms

Code
# crsp_full[crsp_full['permno']==90848] 
# permno, 제대로된 lifetype, 현재 lifetype, 현재 life, 입력 crsp_trunc 레코드가 2개 존재함
# 30330, old-death, lifetype=old-death,life=2, first record date = sample_start
# 14252, shell, lifetype= normal, life=2, sample_start < first record date < sample_end
# 13010, shell, lifetype= normal, life=2, sample_start < first record date < sample_end
# 90848, shell, lifetype= normal, life=2, sample_start < first record date < sample_end
# Refco Inc, 90848

# permno, 제대로된 lifetype, 현재 lifetype, 현재 life, 이 경우는 입력 crsp_trunc 레코드가 1개만 존재함
# 15139, young-living, lifetype= shell, life=1, first record date = sample_end
# 16815
# crsp[crsp['permno']==14475] # Super Micro Computer (SMCI)라는 회사만 2019 상장폐지 후 2020년 재상장시 Permno (14475)가 유지되었다

result = crsp_full.groupby('lifetype')['permno'].nunique()
print(result)

import matplotlib.pyplot as plt
import seaborn as sns

# Group by permno to get unique records for plotting (1 row per permno)
permno_summary = (
    crsp_full.sort_values("date")
    .groupby("permno", as_index=False)
    .first()
    [["permno", "industry", "lifetype", "life", "state"]]
)
lifetype
normal          5860
old-death       5882
old-living      1021
shell             10
young-living    3000
Name: permno, dtype: int64

3 Life Distribution for Each Lifetype

Code
# FacetGrid 생성
g = sns.FacetGrid(permno_summary, col="industry", col_wrap=4, height=4, sharex=False, sharey=False)

# industry 이름 리스트
industry_names = g.col_names

# 각 subplot에 대해 데이터 분할 및 그리기
for ax, industry in zip(g.axes.flat, industry_names):
    data = permno_summary[permno_summary['industry'] == industry]
    for lifetype, lifetype_data in data.groupby('lifetype'):
        ax.hist(lifetype_data['life'], bins=20, alpha=0.7, label=lifetype)
    ax.set_title(industry)
    ax.legend()

plt.tight_layout()
plt.show()

4 Firm counts per Industry for Each Lifetype

Code
# 주요 lifetype만 사용
main_lifetypes = ['old-living', 'old-death', 'young-living', 'normal']
filtered_data = permno_summary[permno_summary['lifetype'].isin(main_lifetypes)]

# industry 순서: old-death 기준
industry_order = (
    filtered_data[filtered_data['lifetype'] == 'old-death']['industry']
    .value_counts()
    .index.tolist()
)

# 전체 industry 목록 확보
all_industries = filtered_data['industry'].unique().tolist()

# industry에 대한 색상 팔레트 생성
palette_colors = sns.color_palette("tab20", len(all_industries))
palette = dict(zip(all_industries, palette_colors))

# y축 최대값: old-death 기준
y_max = (
    filtered_data[filtered_data['lifetype'] == 'old-death']['industry']
    .value_counts()
    .max()
)

# 서브플롯 설정
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
axes = axes.flatten()

for i, lifetype in enumerate(main_lifetypes):
    ax = axes[i]
    subset = filtered_data[filtered_data['lifetype'] == lifetype]
    sns.countplot(
        data=subset,
        x='industry',
        hue='industry',
        order=industry_order,
        palette=palette,
        ax=ax
    )
    ax.set_title(f"Firm counts by Industry for Lifetype={lifetype}")
    ax.set_ylabel("Count")
    ax.set_ylim(0, y_max + 5)
    ax.tick_params(axis='x', rotation=45)
    ax.set_xlabel("Industry")

# 여분 subplot 제거
for j in range(len(main_lifetypes), len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()

5 Newly listed Firms per Initial Decile by Industry

Code
# in-sample 신규 상장된 permno
in_sample_types = ['normal', 'young-living', 'shell']
in_sample_df = permno_summary[permno_summary['lifetype'].isin(in_sample_types)]

# 산업 순서 정의
industry_order = ['Manufacturing', 'Finance', 'Services', 'Retail', 'Transportation',
                  'Mining', 'Wholesale', 'Utilities', 'Construction', 'Agriculture',
                  'Public', 'Missing']

# subplot 설정
n_cols = 4
n_rows = (len(industry_order) + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 10), sharey=True)
axes = axes.flatten()

for i, industry in enumerate(industry_order):
    ax = axes[i]
    subset = in_sample_df[in_sample_df['industry'] == industry]
    if subset.empty:
        ax.set_title(industry)
        ax.axis('off')
        continue
    sns.countplot(data=subset, x='state', ax=ax, order=sorted(subset['state'].dropna().unique()), color='skyblue')
    ax.set_title(industry)
    ax.set_xlabel('Initial Decile')
    ax.set_ylabel('Permno Counts')
    ax.tick_params(axis='x', rotation=0)

# 남는 subplot 제거
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

fig.suptitle("Newly listed Firms per Initial Decile by Industry", fontsize=16)
fig.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

6 Life per of Initial Decile by Industry (Full cycle Firms)

Code
# 필터링: lifetype이 'normal'인 경우만
normal_df = permno_summary[permno_summary['lifetype'] == 'normal'].copy()

# 산업 순서 지정
industry_order = ['Manufacturing', 'Finance', 'Services', 'Retail', 'Transportation', 'Mining',
                  'Wholesale', 'Utilities', 'Construction', 'Agriculture', 'Public', 'Missing']

# state와 life의 관계를 박스플롯으로 각 industry별 subplot 생성
n_cols = 4
n_rows = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 10), sharey=True)

for ax, industry in zip(axes.flat, industry_order):
    data = normal_df[normal_df['industry'] == industry]
    if not data.empty and data['state'].nunique() > 0:
        sns.boxplot(data=data, x='state', y='life', ax=ax, order=sorted(data['state'].unique()))
        ax.set_title(industry)
    else:
        ax.set_visible(False)

    ax.set_xlabel('Initial Decile')
    ax.set_ylabel('Life (Months)')

# 빈 subplot 제거
for j in range(len(industry_order), len(axes.flat)):
    fig.delaxes(axes.flat[j])

fig.suptitle("Life per of Initial Decile by Industry (Lifetype=Normal)", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

7 Median Life per of Initial Decile by Industry (Full lifecycle Firms)

Code
# lifetype이 'normal'인 permno만 필터링
normal_df = permno_summary[permno_summary['lifetype'] == 'normal'].copy()

# subplot 순서 지정
industry_order = ['Manufacturing', 'Finance', 'Services', 'Retail', 'Transportation',
                  'Mining', 'Wholesale', 'Utilities', 'Construction',
                  'Agriculture', 'Public', 'Missing']

# 산업과 state별로 median life 계산
grouped = normal_df.groupby(['industry', 'state'])['life'].median().reset_index()

# plotting
n_cols = 4
n_rows = 3
fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 10), sharey=True)
axes = axes.flatten()

for i, industry in enumerate(industry_order):
    ax = axes[i]
    data = grouped[grouped['industry'] == industry]
    if not data.empty:
        sns.barplot(data=data, x='state', y='life', hue='state', ax=ax, palette='Blues_d', legend=False)
        ax.set_title(industry)
        ax.set_xlabel("Initial Decile")
        ax.set_ylabel("Median Life (Month)")
    else:
        ax.set_visible(False)

fig.suptitle("Median Life per Initial State by Industry (Lifetype=Normal)", fontsize=16)
plt.tight_layout()
plt.show()

8 The number of “continuously alive” firms per Initial State by Industry (lifetype = Old-Living)

Code
# Filter only 'old-living' lifetype
old_living_data = permno_summary[permno_summary['lifetype'] == 'old-living']

# Define industry order
industry_order = [
    "Manufacturing", "Finance", "Services", "Retail", "Transportation",
    "Mining", "Wholesale", "Utilities", "Construction",
    "Agriculture", "Public", "Missing"
]

# Set up the subplot grid
fig, axes = plt.subplots(3, 4, figsize=(16, 10), sharey=True)
axes = axes.flatten()

for i, industry in enumerate(industry_order):
    ax = axes[i]
    data = old_living_data[old_living_data["industry"] == industry]
    if not data.empty:
        state_counts = data["state"].value_counts().sort_index()
        bar_data = pd.DataFrame({"state": state_counts.index, "count": state_counts.values})
        sns.barplot(data=bar_data, x="state", y="count", ax=ax, color="skyblue", order=sorted(bar_data["state"].unique()))
    ax.set_title(industry)
    ax.set_xlabel("Initial Decile")
    ax.set_ylabel("Permno Count")
    ax.set_xticks(range(10))
    ax.set_xticklabels(range(1, 11))

# Remove any unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.suptitle("Permno Count per Initial State By Industry (lifetype = Old-Living)", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

9 The number of “1-month shell” firms per Initial State by Industry (lifetype = shell)

Code
# Filter only 'shell' lifetype
shell_data = permno_summary[permno_summary['lifetype'] == 'shell'].copy()

# Define industry order
industry_order = [
    "Manufacturing", "Finance", "Services", "Retail", "Transportation",
    "Mining", "Wholesale", "Utilities", "Construction",
    "Agriculture", "Public", "Missing"
]

# Set up the subplot grid
fig, axes = plt.subplots(3, 4, figsize=(16, 10), sharey=True)
axes = axes.flatten()

for i, industry in enumerate(industry_order):
    ax = axes[i]
    data = shell_data[shell_data["industry"] == industry]
    if not data.empty:
        # Count state values and convert to sorted DataFrame
        bar_data = (
            data.groupby("state")
            .size()
            .reindex(range(1, 11), fill_value=0)
            .reset_index(name="count")
        )
        sns.barplot(data=bar_data, x="state", y="count", ax=ax, color="skyblue")
        ax.set_xticks(range(0, 10))
        ax.set_xticklabels(range(1, 11))
    ax.set_title(industry)
    ax.set_xlabel("Initial Decile")
    ax.set_ylabel("Permno Count")

# Remove any unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.suptitle("Permno Count per Initial State by Industry (lifetype = Shell)", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()