Several stocks in China A-share market

myStocks: get data

Get stock prices of using tidyquant package

myStocks <- c('601318.SS','601899.SS','601998.SS','600600.SS','600016.SS','600332.SS','601992.SS','600085.SS','601811.SS','600754.SS','601111.SS','600585.SS','000063.SZ','002594.SZ','000921.SZ','002202.SZ','002672.SZ','000338.SZ') %>%
  tq_get(get  = "stock.prices",
         from = "2015-01-01",
         to   = "2020-09-30") %>%
  group_by(symbol) 

glimpse(myStocks) # examine the structure of the resulting data frame
## Rows: 24,792
## Columns: 8
## Groups: symbol [18]
## $ symbol   <chr> "601318.SS", "601318.SS", "601318.SS", "601318.SS", "601318.…
## $ date     <date> 2015-01-05, 2015-01-06, 2015-01-07, 2015-01-08, 2015-01-09,…
## $ open     <dbl> 38.8, 37.2, 36.7, 37.2, 35.6, 37.0, 37.7, 37.6, 37.0, 39.7, …
## $ high     <dbl> 39.1, 38.4, 37.8, 37.5, 39.1, 38.1, 38.5, 37.9, 39.7, 40.7, …
## $ low      <dbl> 37.6, 36.0, 36.2, 35.4, 35.4, 36.2, 37.0, 36.5, 36.8, 39.0, …
## $ close    <dbl> 38.1, 36.9, 36.7, 35.5, 36.4, 37.6, 37.6, 36.9, 39.4, 39.1, …
## $ volume   <dbl> 4.87e+08, 4.68e+08, 3.41e+08, 3.58e+08, 6.24e+08, 5.31e+08, …
## $ adjusted <dbl> 34.0, 32.9, 32.8, 31.8, 32.5, 33.6, 33.6, 33.0, 35.2, 35.0, …
# add names to stocks
name <- data.frame(name = c('Ping An Insurance','Zijin Mining Group Ltd','China CITIC Bank','Tsingtao Brewery','China Minsheng Bank','Guangzhou Baiyunshan Pharmaceutical Holdings','BBMG Corporation Ltd','Tong Ren Tang','Xinhua Winshare Publishing and Media Co., Ltd','Shanghai Jin Jiang International Hotels','Air China','Anhui Conch Cement','ZTE Corporation','BYD Company','Hisense Kelon Electrical Holdings Company Limited','Xinjiang Goldwind Science & Technology Co., Ltd','Dongjiang Environmental','Weichai Power'), 
                   symbol = c('601318.SS','601899.SS','601998.SS','600600.SS','600016.SS','600332.SS','601992.SS','600085.SS','601811.SS','600754.SS','601111.SS','600585.SS','000063.SZ','002594.SZ','000921.SZ','002202.SZ','002672.SZ','000338.SZ'))

name$name <- factor(name$name, 
                    levels = name$name,
                    ordered = TRUE)

myStocks <- myStocks %>% 
  left_join(name, by="symbol")

Transforming the data to include daily, monthly and yearly returns

#calculate daily returns
myStocks_returns_daily <- myStocks %>%
  tq_transmute(select     = adjusted, 
               mutate_fun = periodReturn, 
               period     = "daily", 
               type       = "log",
               col_rename = "daily.returns",
               cols = c(nested.col))    %>% 
  left_join(name, by="symbol")

#calculate monthly  returns
myStocks_returns_monthly <- myStocks %>%
  tq_transmute(select     = adjusted, 
               mutate_fun = periodReturn, 
               period     = "monthly", 
               type       = "arithmetic",
               col_rename = "monthly.returns",
               cols = c(nested.col))   %>% 
  left_join(name, by="symbol")

#calculate yearly returns
myStocks_returns_annual <- myStocks %>%
  group_by(symbol) %>%
  tq_transmute(select     = adjusted, 
               mutate_fun = periodReturn, 
               period     = "yearly", 
               type       = "arithmetic",
               col_rename = "yearly.returns",
               cols = c(nested.col))  %>% 
  left_join(name, by="symbol")

Creating a new dataframe for summary statistics for monthly stock returns

Summary_monthly_returns <- myStocks_returns_monthly %>%
  summarise(Min=min(monthly.returns), 
            Max= max(monthly.returns), 
            Median= median(monthly.returns),
            Mean= mean(monthly.returns), 
            Standard_Deviation=sd(monthly.returns))  %>% 
  left_join(name, by="symbol") %>% 
  select("name",1:6)

Summary_monthly_returns #view dataframe
## # A tibble: 18 x 7
##    name                 symbol     Min   Max   Median      Mean Standard_Deviat…
##    <ord>                <chr>    <dbl> <dbl>    <dbl>     <dbl>            <dbl>
##  1 ZTE Corporation      000063… -0.584 0.482  0.0245    2.33e-2           0.150 
##  2 Weichai Power        000338… -0.291 0.240  0.0144    1.82e-2           0.0960
##  3 Hisense Kelon Elect… 000921… -0.234 0.450 -0.0141    1.68e-2           0.142 
##  4 Xinjiang Goldwind S… 002202… -0.297 0.396 -0.00894   1.01e-2           0.131 
##  5 BYD Company          002594… -0.273 0.409 -0.00267   2.22e-2           0.127 
##  6 Dongjiang Environme… 002672… -0.310 0.288 -0.00413   1.73e-4           0.106 
##  7 China Minsheng Bank  600016… -0.114 0.117 -0.00351  -3.06e-3           0.0490
##  8 Tong Ren Tang        600085… -0.348 0.678 -0.00719   1.00e-2           0.119 
##  9 Guangzhou Baiyunsha… 600332… -0.301 0.255  0.00216   6.99e-3           0.0963
## 10 Anhui Conch Cement   600585… -0.230 0.199  0.0127    1.98e-2           0.0887
## 11 Tsingtao Brewery     600600… -0.221 0.302  0.0122    1.38e-2           0.103 
## 12 Shanghai Jin Jiang … 600754… -0.360 0.422  0.00376   1.61e-2           0.125 
## 13 Air China            601111… -0.341 0.384  0.0181    6.33e-3           0.130 
## 14 Ping An Insurance    601318… -0.181 0.188  0.0124    1.46e-2           0.0748
## 15 Xinhua Winshare Pub… 601811… -0.250 2.11  -0.0147    2.70e-2           0.315 
## 16 Zijin Mining Group … 601899… -0.265 0.354 -0.0111    1.60e-2           0.117 
## 17 BBMG Corporation Ltd 601992… -0.188 0.734 -0.0199   -4.31e-5           0.125 
## 18 China CITIC Bank     601998… -0.231 0.165 -0.0119   -1.69e-3           0.0706

Comparing the distribution of monthly returns between various stocks

ggplot(myStocks_returns_monthly, aes(x=monthly.returns))+
  geom_density()+
  facet_wrap(~name)+
  theme_igray()+
  labs(x="Monthly Returns", 
       y="Density", 
       title = "Charting the Distribution of Monthly Returns")

The plots where the peaks are low and spread out over a wide distance are riskier since their monthly returns tend to fluctuate more often between negative and positive and are less likely to be similar over long periods of time. The plots where the peak is high and nearer to positive returns are less risky. Air China appears to be riskiest, followed by Ekectrical Holdings. Banks appears to the least risky.

Analyzing the risk vs return

ggplot(Summary_monthly_returns,aes(x=Standard_Deviation,y=Mean))+
  geom_point()+
  ggrepel::geom_text_repel(position="identity",label=Summary_monthly_returns$symbol)+
  theme_solarized()+
  labs(x="Risk", 
       y="Expected Monthly Return", 
       title = "Charting the Risk vs. Expected Return of stocks")

Boxplot of annual returns

myStocks_returns_annual %>% 
  group_by(name) %>% 
  mutate(median_return= median(yearly.returns)) %>% 

  # arrange stocks by median yearly return, so highest median return appears first, etc.   
  ggplot(aes(x=reorder(name, median_return), y=yearly.returns, colour=name)) +
  geom_boxplot()+
  coord_flip()+
  labs(x="Stock", 
       y="Returns", 
       title = "Boxplot of Annual Returns")+
  scale_y_continuous(labels = scales::percent_format(accuracy = 2))+
  guides(color=FALSE) +
  theme_bw()+
  NULL

Bar plot that shows return for each stock on a year-by-year basis

ggplot(myStocks_returns_annual, aes(x=year(date), y=yearly.returns, fill=name)) +
  geom_col(position = "dodge")+
  labs(x="Year", y="Returns", title = "Annual Returns")+
  scale_y_continuous(labels = scales::percent)+
  guides(fill=guide_legend(title=NULL))+
  theme_bw()+
  NULL