이 포스트의 코드는 데이콘 주식 종료 가격 예측 경진대회에 참여하여 작성했던 코드 입니다. 대회 링크

좋은 성적을 거두지는 못했지만, 짧은 시간동안 많은 노력을 들여 재미있게 참여했던 대회여서 블로그를 통해 코드를 공유합니다.


import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr

from sklearn.linear_model import LinearRegression
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')
path = os.getcwd()
list_name = 'stock_list.csv'
sample_name = 'sample_submission.csv'

stock_list = pd.read_csv(os.path.join(path, list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list
종목명 종목코드 상장시장
0 삼성전자 005930 KOSPI
1 SK하이닉스 000660 KOSPI
2 NAVER 035420 KOSPI
3 카카오 035720 KOSPI
4 삼성바이오로직스 207940 KOSPI
... ... ... ...
365 맘스터치 220630 KOSDAQ
366 다날 064260 KOSDAQ
367 제이시스메디칼 287410 KOSDAQ
368 크리스에프앤씨 110790 KOSDAQ
369 쎄트렉아이 099320 KOSDAQ

370 rows × 3 columns

start_date = '20210104'
end_date = '20211214'

Business_days = pd.DataFrame(pd.date_range(start_date, end_date, freq='B'), columns = ['Date'])
Business_days['weekday'] = Business_days.Date.apply(lambda x:x.weekday())
Business_days['weeknum'] = Business_days.Date.apply(lambda x:x.strftime('%V'))
Business_days
Date weekday weeknum
0 2021-01-04 0 01
1 2021-01-05 1 01
2 2021-01-06 2 01
3 2021-01-07 3 01
4 2021-01-08 4 01
... ... ... ...
242 2021-12-08 2 49
243 2021-12-09 3 49
244 2021-12-10 4 49
245 2021-12-13 0 50
246 2021-12-14 1 50

247 rows × 3 columns

  • KOSPI 정보 추가
KOSPI = fdr.DataReader('KS11', start_date, end_date).reset_index()
KOSPI.name = 'kospi'
def col_rename(data_set):
    for i in data_set.columns:
        if i =='Date':
            pass
        else:
            data_set.rename(columns={i:data_set.name+'_'+i}, inplace=True)
    return data_set        
col_rename(KOSPI)
Date kospi_Close kospi_Open kospi_High kospi_Low kospi_Volume kospi_Change
0 2021-01-04 2944.45 2874.50 2946.54 2869.11 1.030000e+09 0.0247
1 2021-01-05 2990.57 2943.67 2990.57 2921.84 1.520000e+09 0.0157
2 2021-01-06 2968.21 2993.34 3027.16 2961.37 1.790000e+09 -0.0075
3 2021-01-07 3031.68 2980.75 3055.28 2980.75 1.520000e+09 0.0214
4 2021-01-08 3152.18 3040.11 3161.11 3040.11 1.300000e+09 0.0397
... ... ... ... ... ... ... ...
231 2021-12-08 3001.80 3017.93 3036.13 2995.34 4.882500e+08 0.0034
232 2021-12-09 3029.57 3007.00 3029.57 3001.55 5.134400e+08 0.0093
233 2021-12-10 3010.23 3008.70 3017.64 2998.29 4.516000e+08 -0.0064
234 2021-12-13 3001.66 3019.67 3043.83 3000.51 3.758300e+08 -0.0028
235 2021-12-14 2987.95 2983.95 3001.70 2976.16 5.815700e+08 -0.0046

236 rows × 7 columns

data = pd.merge(Business_days, KOSPI, how = 'outer')
  • KOSDAQ 정보 추가
KOSDAQ = fdr.DataReader('KQ11', start_date, end_date).reset_index()
KOSDAQ.name = 'kosdaq'
col_rename(KOSDAQ)
Date kosdaq_Close kosdaq_Open kosdaq_High kosdaq_Low kosdaq_Volume kosdaq_Change
0 2021-01-04 977.62 968.86 977.62 960.52 1.700000e+09 0.0095
1 2021-01-05 985.76 976.43 985.76 965.53 1.810000e+09 0.0083
2 2021-01-06 981.39 987.25 990.88 977.37 1.980000e+09 -0.0044
3 2021-01-07 988.86 983.28 993.91 982.27 2.260000e+09 0.0076
4 2021-01-08 987.79 990.70 995.22 978.12 2.560000e+09 -0.0011
... ... ... ... ... ... ... ...
231 2021-12-08 1006.04 1006.61 1012.64 1003.92 1.070000e+09 0.0094
232 2021-12-09 1022.87 1009.29 1022.87 1009.29 1.060000e+09 0.0167
233 2021-12-10 1011.57 1016.34 1018.42 1010.02 1.150000e+09 -0.0110
234 2021-12-13 1005.96 1014.27 1014.90 1005.96 1.340000e+09 -0.0055
235 2021-12-14 1002.81 1001.11 1007.73 996.85 1.080000e+09 -0.0031

236 rows × 7 columns

data = pd.merge(data, KOSDAQ, how = 'outer')
data
Date weekday weeknum kospi_Close kospi_Open kospi_High kospi_Low kospi_Volume kospi_Change kosdaq_Close kosdaq_Open kosdaq_High kosdaq_Low kosdaq_Volume kosdaq_Change
0 2021-01-04 0 01 2944.45 2874.50 2946.54 2869.11 1.030000e+09 0.0247 977.62 968.86 977.62 960.52 1.700000e+09 0.0095
1 2021-01-05 1 01 2990.57 2943.67 2990.57 2921.84 1.520000e+09 0.0157 985.76 976.43 985.76 965.53 1.810000e+09 0.0083
2 2021-01-06 2 01 2968.21 2993.34 3027.16 2961.37 1.790000e+09 -0.0075 981.39 987.25 990.88 977.37 1.980000e+09 -0.0044
3 2021-01-07 3 01 3031.68 2980.75 3055.28 2980.75 1.520000e+09 0.0214 988.86 983.28 993.91 982.27 2.260000e+09 0.0076
4 2021-01-08 4 01 3152.18 3040.11 3161.11 3040.11 1.300000e+09 0.0397 987.79 990.70 995.22 978.12 2.560000e+09 -0.0011
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
242 2021-12-08 2 49 3001.80 3017.93 3036.13 2995.34 4.882500e+08 0.0034 1006.04 1006.61 1012.64 1003.92 1.070000e+09 0.0094
243 2021-12-09 3 49 3029.57 3007.00 3029.57 3001.55 5.134400e+08 0.0093 1022.87 1009.29 1022.87 1009.29 1.060000e+09 0.0167
244 2021-12-10 4 49 3010.23 3008.70 3017.64 2998.29 4.516000e+08 -0.0064 1011.57 1016.34 1018.42 1010.02 1.150000e+09 -0.0110
245 2021-12-13 0 50 3001.66 3019.67 3043.83 3000.51 3.758300e+08 -0.0028 1005.96 1014.27 1014.90 1005.96 1.340000e+09 -0.0055
246 2021-12-14 1 50 2987.95 2983.95 3001.70 2976.16 5.815700e+08 -0.0046 1002.81 1001.11 1007.73 996.85 1.080000e+09 -0.0031

247 rows × 15 columns

  • DOW 정보 추가
DJI = fdr.DataReader('dji', start_date, end_date).reset_index()
DJI.name = 'dji'
col_rename(DJI)
Date dji_Close dji_Open dji_High dji_Low dji_Volume dji_Change
0 2021-01-04 30223.89 30627.47 30674.28 29881.82 476730000.0 -0.0125
1 2021-01-05 30391.60 30204.25 30504.89 30141.78 350910000.0 0.0055
2 2021-01-06 30829.40 30362.78 31022.65 30313.07 500430000.0 0.0144
3 2021-01-07 31041.13 30901.18 31193.40 30897.86 430620000.0 0.0069
4 2021-01-08 31097.97 31069.58 31140.67 30793.27 385650000.0 0.0018
... ... ... ... ... ... ... ...
235 2021-12-08 35754.09 35716.85 35839.72 35603.95 363110000.0 0.0009
236 2021-12-09 35755.28 35722.26 35864.22 35579.91 335370000.0 0.0000
237 2021-12-10 35971.98 35830.55 35982.69 35711.90 344050000.0 0.0061
238 2021-12-13 35652.07 35906.91 35951.28 35610.42 419860000.0 -0.0089
239 2021-12-14 35545.69 35605.73 35779.20 35441.74 414320000.0 -0.0030

240 rows × 7 columns

data = pd.merge(data, DJI, how = 'outer')
  • 원/달러 환율 정보 추가
EXC = fdr.DataReader('USD/KRW', start_date, end_date).reset_index()
EXC.name = 'exc'
col_rename(EXC)
Date exc_Close exc_Open exc_High exc_Low exc_Change
0 2021-01-04 1086.48 1085.73 1088.30 1080.02 0.0016
1 2021-01-05 1086.42 1086.69 1090.33 1082.04 -0.0001
2 2021-01-06 1087.93 1087.40 1089.79 1083.91 0.0014
3 2021-01-07 1094.28 1088.03 1096.78 1085.42 0.0058
4 2021-01-08 1092.93 1094.35 1099.21 1088.79 -0.0012
... ... ... ... ... ... ...
242 2021-12-08 1175.19 1176.85 1179.31 1175.12 -0.0013
243 2021-12-09 1178.15 1173.42 1179.37 1172.62 0.0025
244 2021-12-10 1180.86 1178.27 1182.82 1176.40 0.0023
245 2021-12-13 1184.91 1180.96 1186.16 1177.03 0.0034
246 2021-12-14 1185.19 1185.10 1186.38 1180.97 0.0002

247 rows × 6 columns

data = pd.merge(data, EXC, how = 'outer')
data.columns
Index(['Date', 'weekday', 'weeknum', 'kospi_Close', 'kospi_Open', 'kospi_High',
       'kospi_Low', 'kospi_Volume', 'kospi_Change', 'kosdaq_Close',
       'kosdaq_Open', 'kosdaq_High', 'kosdaq_Low', 'kosdaq_Volume',
       'kosdaq_Change', 'dji_Close', 'dji_Open', 'dji_High', 'dji_Low',
       'dji_Volume', 'dji_Change', 'exc_Close', 'exc_Open', 'exc_High',
       'exc_Low', 'exc_Change'],
      dtype='object')
data
Date weekday weeknum kospi_Close kospi_Open kospi_High kospi_Low kospi_Volume kospi_Change kosdaq_Close ... dji_Open dji_High dji_Low dji_Volume dji_Change exc_Close exc_Open exc_High exc_Low exc_Change
0 2021-01-04 0 01 2944.45 2874.50 2946.54 2869.11 1.030000e+09 0.0247 977.62 ... 30627.47 30674.28 29881.82 476730000.0 -0.0125 1086.48 1085.73 1088.30 1080.02 0.0016
1 2021-01-05 1 01 2990.57 2943.67 2990.57 2921.84 1.520000e+09 0.0157 985.76 ... 30204.25 30504.89 30141.78 350910000.0 0.0055 1086.42 1086.69 1090.33 1082.04 -0.0001
2 2021-01-06 2 01 2968.21 2993.34 3027.16 2961.37 1.790000e+09 -0.0075 981.39 ... 30362.78 31022.65 30313.07 500430000.0 0.0144 1087.93 1087.40 1089.79 1083.91 0.0014
3 2021-01-07 3 01 3031.68 2980.75 3055.28 2980.75 1.520000e+09 0.0214 988.86 ... 30901.18 31193.40 30897.86 430620000.0 0.0069 1094.28 1088.03 1096.78 1085.42 0.0058
4 2021-01-08 4 01 3152.18 3040.11 3161.11 3040.11 1.300000e+09 0.0397 987.79 ... 31069.58 31140.67 30793.27 385650000.0 0.0018 1092.93 1094.35 1099.21 1088.79 -0.0012
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
242 2021-12-08 2 49 3001.80 3017.93 3036.13 2995.34 4.882500e+08 0.0034 1006.04 ... 35716.85 35839.72 35603.95 363110000.0 0.0009 1175.19 1176.85 1179.31 1175.12 -0.0013
243 2021-12-09 3 49 3029.57 3007.00 3029.57 3001.55 5.134400e+08 0.0093 1022.87 ... 35722.26 35864.22 35579.91 335370000.0 0.0000 1178.15 1173.42 1179.37 1172.62 0.0025
244 2021-12-10 4 49 3010.23 3008.70 3017.64 2998.29 4.516000e+08 -0.0064 1011.57 ... 35830.55 35982.69 35711.90 344050000.0 0.0061 1180.86 1178.27 1182.82 1176.40 0.0023
245 2021-12-13 0 50 3001.66 3019.67 3043.83 3000.51 3.758300e+08 -0.0028 1005.96 ... 35906.91 35951.28 35610.42 419860000.0 -0.0089 1184.91 1180.96 1186.16 1177.03 0.0034
246 2021-12-14 1 50 2987.95 2983.95 3001.70 2976.16 5.815700e+08 -0.0046 1002.81 ... 35605.73 35779.20 35441.74 414320000.0 -0.0030 1185.19 1185.10 1186.38 1180.97 0.0002

247 rows × 26 columns

# data[['kospi_Close', 'kospi_Open', 'kospi_High',
#        'kospi_Low', 'kospi_Volume', 'kospi_Change', 'kosdaq_Close',
#        'kosdaq_Open', 'kosdaq_High', 'kosdaq_Low', 'kosdaq_Volume',
#        'kosdaq_Change', 'dji_Close', 'dji_Open', 'dji_High', 'dji_Low',
#        'dji_Volume', 'dji_Change', 'exc_Close', 'exc_Open', 'exc_High',
#        'exc_Low', 'exc_Change']] = data[['kospi_Close', 'kospi_Open', 'kospi_High',
#        'kospi_Low', 'kospi_Volume', 'kospi_Change', 'kosdaq_Close',
#        'kosdaq_Open', 'kosdaq_High', 'kosdaq_Low', 'kosdaq_Volume',
#        'kosdaq_Change', 'dji_Close', 'dji_Open', 'dji_High', 'dji_Low',
#        'dji_Volume', 'dji_Change', 'exc_Close', 'exc_Open', 'exc_High',
#        'exc_Low', 'exc_Change']].shift(5)
data.isnull().sum()
Date              0
weekday           0
weeknum           0
kospi_Close      11
kospi_Open       11
kospi_High       11
kospi_Low        11
kospi_Volume     11
kospi_Change     11
kosdaq_Close     11
kosdaq_Open      11
kosdaq_High      11
kosdaq_Low       11
kosdaq_Volume    11
kosdaq_Change    11
dji_Close         7
dji_Open          7
dji_High          7
dji_Low           7
dji_Volume        7
dji_Change        7
exc_Close         0
exc_Open          0
exc_High          0
exc_Low           0
exc_Change        0
dtype: int64
  • 미국 휴장일은 전일 데이터로 fill up
dji_nan = data[data['dji_Open'].isnull()].index
data.columns
Index(['Date', 'weekday', 'weeknum', 'kospi_Close', 'kospi_Open', 'kospi_High',
       'kospi_Low', 'kospi_Volume', 'kospi_Change', 'kosdaq_Close',
       'kosdaq_Open', 'kosdaq_High', 'kosdaq_Low', 'kosdaq_Volume',
       'kosdaq_Change', 'dji_Close', 'dji_Open', 'dji_High', 'dji_Low',
       'dji_Volume', 'dji_Change', 'exc_Close', 'exc_Open', 'exc_High',
       'exc_Low', 'exc_Change'],
      dtype='object')
for i in dji_nan:
    for j in range(6,12):
        n = data.iloc[i-1,-j]
        data.iloc[i,-j] = n
  • 미국 지수의 경우 전일 시가는 24:00전에 알 수 있지만 다른정보는 24:00 이전에 알 수 없으므로 하루를 밀어줌.
data[['dji_Close', 'dji_High', 'dji_Low',
       'dji_Volume', 'dji_Change']] = data[['dji_Close', 'dji_High', 'dji_Low',
       'dji_Volume', 'dji_Change']].shift(1)
  • 한국 휴장 일은 data 에서 삭제.
kospi_nan = data[data['kospi_Open'].isnull()].index
exc_nan = data[data['exc_Open'].isnull()].index
nan_list = set(kospi_nan.to_list() + exc_nan.to_list())
data.drop(nan_list, inplace=True)
data.drop(0, axis=0, inplace=True)
shift_col = data.columns.drop(['Date', 'weekday', 'weeknum'])
data[shift_col] = data[shift_col].shift(5)
data = data[5:].reset_index(drop=True)
data.isnull().sum()
Date             0
weekday          0
weeknum          0
kospi_Close      0
kospi_Open       0
kospi_High       0
kospi_Low        0
kospi_Volume     0
kospi_Change     0
kosdaq_Close     0
kosdaq_Open      0
kosdaq_High      0
kosdaq_Low       0
kosdaq_Volume    0
kosdaq_Change    0
dji_Close        0
dji_Open         0
dji_High         0
dji_Low          0
dji_Volume       0
dji_Change       0
exc_Close        0
exc_Open         0
exc_High         0
exc_Low          0
exc_Change       0
dtype: int64
ms_data = data.copy()
data['weekday'] = data['weekday'].astype('str')
dummies = pd.get_dummies(data[['weekday', 'weeknum']], prefix=['weekday','weeknum'])
m_data = pd.concat([data, dummies], axis=1)
m_data.drop(['weekday','weeknum'], axis=1, inplace=True)
m_data
Date kospi_Close kospi_Open kospi_High kospi_Low kospi_Volume kospi_Change kosdaq_Close kosdaq_Open kosdaq_High ... weeknum_41 weeknum_42 weeknum_43 weeknum_44 weeknum_45 weeknum_46 weeknum_47 weeknum_48 weeknum_49 weeknum_50
0 2021-01-12 2990.57 2943.67 2990.57 2921.84 1.520000e+09 0.0157 985.76 976.43 985.76 ... 0 0 0 0 0 0 0 0 0 0
1 2021-01-13 2968.21 2993.34 3027.16 2961.37 1.790000e+09 -0.0075 981.39 987.25 990.88 ... 0 0 0 0 0 0 0 0 0 0
2 2021-01-14 3031.68 2980.75 3055.28 2980.75 1.520000e+09 0.0214 988.86 983.28 993.91 ... 0 0 0 0 0 0 0 0 0 0
3 2021-01-15 3152.18 3040.11 3161.11 3040.11 1.300000e+09 0.0397 987.79 990.70 995.22 ... 0 0 0 0 0 0 0 0 0 0
4 2021-01-18 3148.45 3161.90 3266.23 3096.19 1.710000e+09 -0.0012 976.63 988.38 993.20 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
225 2021-12-08 2899.72 2860.12 2905.74 2837.03 5.639300e+08 0.0214 977.15 969.90 977.73 ... 0 0 0 0 0 0 0 0 1 0
226 2021-12-09 2945.27 2874.64 2945.27 2874.64 5.344600e+08 0.0157 977.43 967.34 978.16 ... 0 0 0 0 0 0 0 0 1 0
227 2021-12-10 2968.33 2935.93 2975.44 2927.55 4.867500e+08 0.0078 998.47 981.65 998.49 ... 0 0 0 0 0 0 0 0 1 0
228 2021-12-13 2973.25 2954.82 2983.50 2932.49 4.799100e+08 0.0017 991.87 990.07 994.15 ... 0 0 0 0 0 0 0 0 0 1
229 2021-12-14 2991.72 2973.84 2992.31 2960.90 5.413700e+08 0.0062 996.64 996.89 999.52 ... 0 0 0 0 0 0 0 0 0 1

230 rows × 78 columns

옵션/선물 만기일

# https://search.naver.com/search.naver?where=nexearch&sm=tab_etc&qvt=0&query=%EC%98%B5%EC%85%98%EB%A7%8C%EA%B8%B0%EC%9D%BC
f_option = ['2021-01-14', '2021-02-10', '2021-03-11', '2021-04-08', '2021-05-13', '2021-06-10', 
            '2021-07-08', '2021-08-12', '2021-09-09', '2021-10-14', '2021-11-11', '2021-12-09']
f_futures = ['2021-03-11', '2021-06-10', '2021-09-09', '2021-12-09']

전체 모델링

sample_name = 'sample_submission.csv'
sample_submission = pd.read_csv(os.path.join(path, sample_name))
submission = sample_submission.set_index('Day')
from xgboost import XGBRegressor
xgb = XGBRegressor()
day_list = ['2021-11-29', '2021-11-30', '2021-12-01', '2021-12-02', '2021-12-03']
stock_list.drop(stock_list[stock_list['종목코드']=='031390'].index,inplace=True)
for code in tqdm(stock_list['종목코드'].values):
    for day in day_list:
        code_data = fdr.DataReader(code, start = start_date, end = end_date).reset_index()
        code_data['y_Close'] = code_data['Close']    
        
        code_data[[
            'Open', 'High', 'Low', 'Close', 'Volume', 'Change']] = code_data[[
            'Open', 'High', 'Low', 'Close', 'Volume', 'Change']].shift(5)

        #이동 평균선 추가 (5/10/20)
        code_data['5_close'] = code_data['Close'].rolling(window=5).mean()   
        code_data['10_close'] = code_data['Close'].rolling(window=10).mean()   
        code_data['20_close'] = code_data['Close'].rolling(window=20).mean()   
        
        #6일전/7일전 종가 추가
        code_data['6d_Close'] = code_data[['Close']].shift(1)
        code_data['7d_Close'] = code_data[['6d_Close']].shift(1)
        
        #6일전/7일전 종가와 전일 종가의 차이 비율 추가
        code_data['diff_Close'] = (code_data['Close'] - code_data['6d_Close'])/code_data['Close']
        code_data['diff_Close2'] = (code_data['Close'] - code_data['7d_Close'])/code_data['Close']

        
        #6일전 거래량 추가
        code_data['6d_Volume'] = code_data[['Volume']].shift(6)
        
        #2일전 거래량과 전일 거래량의 차이 추가
        code_data['diff_Volume'] = code_data['Volume'] - code_data['6d_Volume']
               
        df = pd.merge(m_data, code_data, how='left', on='Date')
        df = df.drop(df[df.isnull().any(axis=1)].index, axis=0)
       
        market = stock_list[stock_list['종목코드'].str.contains(code)]['상장시장'].values[0]
       
        if market == 'KOSPI':
            df_kospi = df.drop(['kosdaq_Close', 'kosdaq_Open', 'kosdaq_High', 'kosdaq_Low', 'kosdaq_Volume', 'kosdaq_Change'], axis=1)
            x_train = df_kospi[df_kospi['Date'] < day].drop(['Date','y_Close'], axis=1)
            y_train = df_kospi[df_kospi['Date'] < day]['y_Close']
            xgb.fit(x_train, y_train)
            pred = xgb.predict(df_kospi[df_kospi['Date']==day].drop(['Date','y_Close'], axis=1))
        else:
            df_kosdaq = df.drop(['kospi_Close', 'kospi_Open', 'kospi_High', 'kospi_Low','kospi_Volume', 'kospi_Change', 'kosdaq_Close', 'kosdaq_Open'], axis=1)
            x_train = df_kosdaq[df_kosdaq['Date'] < day].drop(['Date','y_Close'], axis=1)
            y_train = df_kosdaq[df_kosdaq['Date'] < day]['y_Close']
            xgb.fit(x_train, y_train)
            pred = xgb.predict(df_kosdaq[df_kosdaq['Date']==day].drop(['Date','y_Close'], axis=1))
        submission.loc[day,code] = pred
100%|████████████████████████████████████████████████████████████████████████████████| 369/369 [06:12<00:00,  1.01s/it]
# privite submission만 데이터셋으로 작성
submission = submission.iloc[5:,:]
submission
000060 000080 000100 000120 000150 000240 000250 000270 000660 000670 ... 330860 336260 336370 347860 348150 348210 352820 357780 363280 950130
Day
2021-11-29 29417.746094 33234.343750 60132.703125 136056.812500 119314.929688 16740.277344 45884.242188 82530.968750 107088.679688 683745.0625 ... 46994.058594 49025.132812 89673.695312 37659.074219 19519.048828 50946.542969 372958.59375 265210.156250 25597.248047 16075.208008
2021-11-30 30002.962891 30610.638672 59678.476562 129827.921875 117337.351562 15388.730469 44593.121094 79988.164062 114570.687500 653516.1875 ... 43664.417969 50585.960938 97681.562500 35288.453125 20594.652344 50690.828125 379092.87500 264948.375000 23948.753906 16622.246094
2021-12-01 29913.453125 29738.423828 58936.699219 125720.078125 116199.320312 15408.984375 45524.351562 79979.945312 115808.617188 643901.2500 ... 44753.972656 49181.937500 97690.156250 36902.507812 19222.310547 46054.750000 367844.65625 259253.796875 23623.125000 17566.416016
2021-12-02 30087.794922 30333.685547 57824.656250 126370.695312 114069.851562 15501.014648 44904.937500 80623.687500 116014.000000 648058.5000 ... 46721.980469 49454.167969 100551.687500 36566.273438 20361.396484 48312.214844 375545.75000 272749.937500 24086.357422 16994.531250
2021-12-03 31241.271484 29993.458984 60009.445312 128049.921875 116856.796875 16200.601562 44051.550781 80780.773438 115700.054688 662439.3750 ... 48972.601562 46961.933594 98532.125000 37583.468750 21500.310547 49715.628906 343600.87500 269958.593750 25039.224609 17610.996094

5 rows × 370 columns

실제 종가와 비교

true_close = sample_submission.copy().set_index('Day')

code_data = fdr.DataReader('000060', start = '2021-11-01', end = '2021-11-01')['Close'].reset_index()
for code in tqdm(stock_list['종목코드'].values):
    for day in day_list:
        code_data = fdr.DataReader(code, start = day, end = day)['Close'].reset_index()
        true_close.loc[day,code] = code_data['Close'].values[0]
100%|████████████████████████████████████████████████████████████████████████████████| 369/369 [02:19<00:00,  2.65it/s]
true_close = true_close.iloc[5:,:]
true_close
000060 000080 000100 000120 000150 000240 000250 000270 000660 000670 ... 330860 336260 336370 347860 348150 348210 352820 357780 363280 950130
Day
2021-11-29 31200.0 30300.0 58900.0 129000.0 110500.0 15200.0 43250.0 79200.0 116000.0 641000.0 ... 43750.0 48750.0 100500.0 37100.0 19800.0 49100.0 369000.0 266300.0 24200.0 17650.0
2021-11-30 31300.0 29000.0 57800.0 124000.0 108500.0 15150.0 42150.0 77800.0 114000.0 632000.0 ... 43300.0 49250.0 97800.0 35550.0 18600.0 45500.0 364500.0 255800.0 23100.0 19500.0
2021-12-01 31700.0 29400.0 57700.0 125000.0 112000.0 15500.0 42750.0 81200.0 116500.0 630000.0 ... 48950.0 48700.0 98500.0 36050.0 18900.0 46200.0 352500.0 264200.0 23850.0 18650.0
2021-12-02 32150.0 29550.0 60100.0 129000.0 110000.0 16150.0 43100.0 81600.0 120000.0 639000.0 ... 51900.0 46250.0 95100.0 33800.0 18650.0 48550.0 330000.0 274700.0 25200.0 18050.0
2021-12-03 32700.0 30600.0 60300.0 131000.0 108500.0 16400.0 44900.0 82500.0 118000.0 642000.0 ... 51900.0 46800.0 95300.0 34500.0 19100.0 49000.0 354500.0 275900.0 25800.0 18150.0

5 rows × 370 columns

diff = np.mean(np.abs(submission-true_close)/true_close)
np.mean(np.mean(np.abs(submission[:5] - true_close[:5])/true_close[:5]))*100
4.453063727451483

댓글남기기