different fixes
- change reference category for quintile from 0 to 1
- combine categories (combine quintile 0 and 5)
- drop patients with quintile 0
- drop quintile variable
change reference category
# relevel
g <- g %>% mutate(quintile_cate = as.factor(g$quintile))
g$quintile_cate <- relevel(g$quintile_cate, ref = "1") # or ref = 2 for positional
g$quintile_cate %>% levels()
## [1] "1" "0" "2" "3" "4" "5"
cox_quintile_relevel <- coxph(Surv(fu_time, death) ~ age + gender + copd + quintile_cate + ethnicgroup, data = g)
## Warning in fitter(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 4 ; coefficient may be infinite.
summary(cox_quintile_relevel)
## Call:
## coxph(formula = Surv(fu_time, death) ~ age + gender + copd +
## quintile_cate + ethnicgroup, data = g)
##
## n= 994, number of events= 489
## (6 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 5.994e-02 1.062e+00 5.412e-03 11.074 < 2e-16 ***
## gender2 -2.872e-01 7.503e-01 9.498e-02 -3.024 0.00249 **
## copd1 1.121e-01 1.119e+00 1.066e-01 1.052 0.29273
## quintile_cate0 -1.513e+01 2.699e-07 1.208e+03 -0.013 0.99001
## quintile_cate2 -3.549e-01 7.013e-01 1.552e-01 -2.286 0.02224 *
## quintile_cate3 1.049e-01 1.111e+00 1.500e-01 0.700 0.48410
## quintile_cate4 9.765e-03 1.010e+00 1.512e-01 0.065 0.94851
## quintile_cate5 1.026e-01 1.108e+00 1.550e-01 0.662 0.50803
## ethnicgroup2 -1.250e-02 9.876e-01 3.258e-01 -0.038 0.96939
## ethnicgroup3 -8.084e-01 4.456e-01 4.164e-01 -1.942 0.05219 .
## ethnicgroup9 5.001e-01 1.649e+00 3.619e-01 1.382 0.16700
## ethnicgroup8 3.064e-02 1.031e+00 2.259e-01 0.136 0.89209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.062e+00 9.418e-01 1.0506 1.0731
## gender2 7.503e-01 1.333e+00 0.6229 0.9039
## copd1 1.119e+00 8.939e-01 0.9078 1.3786
## quintile_cate0 2.699e-07 3.705e+06 0.0000 Inf
## quintile_cate2 7.013e-01 1.426e+00 0.5173 0.9506
## quintile_cate3 1.111e+00 9.004e-01 0.8278 1.4901
## quintile_cate4 1.010e+00 9.903e-01 0.7508 1.3581
## quintile_cate5 1.108e+00 9.025e-01 0.8178 1.5012
## ethnicgroup2 9.876e-01 1.013e+00 0.5214 1.8704
## ethnicgroup3 4.456e-01 2.244e+00 0.1970 1.0077
## ethnicgroup9 1.649e+00 6.065e-01 0.8112 3.3512
## ethnicgroup8 1.031e+00 9.698e-01 0.6623 1.6054
##
## Concordance= 0.669 (se = 0.013 )
## Likelihood ratio test= 172 on 12 df, p=<2e-16
## Wald test = 143.8 on 12 df, p=<2e-16
## Score (logrank) test = 145.2 on 12 df, p=<2e-16
- “Loglik converged before variable 4 ; coefficient may be infinite.”
- still did not converge for quintile_cate0, large SE (1.208e+03) and infinite CI
combine quintile 0 and 5
library(car)
g <- g %>% mutate(quintile_cate_0and5 = recode(quintile_cate, "c('0','5')='5'"))
table(g$quintile_cate_0and5)
##
## 1 2 3 4 5
## 138 205 211 220 220
cox_quintile_0and5 <- coxph(Surv(fu_time, death) ~ age + gender + copd + quintile_cate_0and5 + ethnicgroup, data = g)
summary(cox_quintile_0and5)
## Call:
## coxph(formula = Surv(fu_time, death) ~ age + gender + copd +
## quintile_cate_0and5 + ethnicgroup, data = g)
##
## n= 994, number of events= 489
## (6 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.060025 1.061863 0.005435 11.045 < 2e-16 ***
## gender2 -0.284075 0.752710 0.095093 -2.987 0.00281 **
## copd1 0.112661 1.119252 0.106503 1.058 0.29014
## quintile_cate_0and52 -0.352892 0.702653 0.155208 -2.274 0.02299 *
## quintile_cate_0and53 0.106042 1.111869 0.149924 0.707 0.47938
## quintile_cate_0and54 0.009889 1.009938 0.151206 0.065 0.94786
## quintile_cate_0and55 0.079240 1.082464 0.155092 0.511 0.60940
## ethnicgroup2 0.001841 1.001843 0.325814 0.006 0.99549
## ethnicgroup3 -0.795241 0.451472 0.416346 -1.910 0.05613 .
## ethnicgroup9 0.413044 1.511412 0.362126 1.141 0.25403
## ethnicgroup8 -0.005971 0.994047 0.225803 -0.026 0.97891
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0619 0.9417 1.0506 1.0732
## gender2 0.7527 1.3285 0.6247 0.9069
## copd1 1.1193 0.8935 0.9084 1.3791
## quintile_cate_0and52 0.7027 1.4232 0.5184 0.9525
## quintile_cate_0and53 1.1119 0.8994 0.8288 1.4917
## quintile_cate_0and54 1.0099 0.9902 0.7509 1.3583
## quintile_cate_0and55 1.0825 0.9238 0.7987 1.4670
## ethnicgroup2 1.0018 0.9982 0.5290 1.8973
## ethnicgroup3 0.4515 2.2150 0.1996 1.0210
## ethnicgroup9 1.5114 0.6616 0.7433 3.0734
## ethnicgroup8 0.9940 1.0060 0.6386 1.5474
##
## Concordance= 0.667 (se = 0.013 )
## Likelihood ratio test= 166.5 on 11 df, p=<2e-16
## Wald test = 142.1 on 11 df, p=<2e-16
## Score (logrank) test = 141.9 on 11 df, p=<2e-16
- this time model converged,
- but does it make sense to combine quintile 0 and 5? probably not, these 2 sets of people are quite different
drop quintile 0 patients
# set quintile 0 to NAs, coxph will ignore these patients
g <- g %>% mutate(quintile_cate_0_NA = recode(quintile_cate, "c('0')=NA"))
table(g$quintile_cate_0_NA, exclude = NULL)
##
## 1 2 3 4 5 <NA>
## 138 205 211 220 216 10
cox_quintile_0_NA <- coxph(Surv(fu_time, death) ~ age + gender + copd + quintile_cate_0_NA + ethnicgroup, data = g)
summary(cox_quintile_0_NA)
## Call:
## coxph(formula = Surv(fu_time, death) ~ age + gender + copd +
## quintile_cate_0_NA + ethnicgroup, data = g)
##
## n= 990, number of events= 489
## (10 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.059936 1.061769 0.005412 11.074 < 2e-16 ***
## gender2 -0.287237 0.750334 0.094980 -3.024 0.00249 **
## copd1 0.112138 1.118668 0.106581 1.052 0.29273
## quintile_cate_0_NA2 -0.354865 0.701268 0.155214 -2.286 0.02224 *
## quintile_cate_0_NA3 0.104937 1.110641 0.149971 0.700 0.48410
## quintile_cate_0_NA4 0.009765 1.009813 0.151204 0.065 0.94851
## quintile_cate_0_NA5 0.102566 1.108010 0.154956 0.662 0.50803
## ethnicgroup2 -0.012504 0.987574 0.325849 -0.038 0.96939
## ethnicgroup3 -0.808391 0.445574 0.416369 -1.942 0.05219 .
## ethnicgroup9 0.500062 1.648824 0.361867 1.382 0.16700
## ethnicgroup8 0.030643 1.031117 0.225876 0.136 0.89209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0618 0.9418 1.0506 1.0731
## gender2 0.7503 1.3327 0.6229 0.9039
## copd1 1.1187 0.8939 0.9078 1.3786
## quintile_cate_0_NA2 0.7013 1.4260 0.5173 0.9506
## quintile_cate_0_NA3 1.1106 0.9004 0.8278 1.4901
## quintile_cate_0_NA4 1.0098 0.9903 0.7508 1.3581
## quintile_cate_0_NA5 1.1080 0.9025 0.8178 1.5012
## ethnicgroup2 0.9876 1.0126 0.5214 1.8704
## ethnicgroup3 0.4456 2.2443 0.1970 1.0077
## ethnicgroup9 1.6488 0.6065 0.8112 3.3512
## ethnicgroup8 1.0311 0.9698 0.6623 1.6054
##
## Concordance= 0.667 (se = 0.013 )
## Likelihood ratio test= 167.4 on 11 df, p=<2e-16
## Wald test = 143.8 on 11 df, p=<2e-16
## Score (logrank) test = 142.8 on 11 df, p=<2e-16
- without lumping quintile 0 with 5, now quintile 5 HR has gone from 1.0825 to 1.1080
- seems even adding 4 patients rom quintile 0 changed HR of quintile 5
drop quintile variable
cox_no_quintile <- coxph(Surv(fu_time, death) ~ age + gender + copd + ethnicgroup, data = g)
summary(cox_no_quintile)
## Call:
## coxph(formula = Surv(fu_time, death) ~ age + gender + copd +
## ethnicgroup, data = g)
##
## n= 1000, number of events= 492
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.058859 1.060625 0.005394 10.911 < 2e-16 ***
## gender2 -0.247823 0.780498 0.094199 -2.631 0.00852 **
## copd1 0.123913 1.131917 0.103791 1.194 0.23253
## ethnicgroup2 0.092533 1.096949 0.320932 0.288 0.77310
## ethnicgroup3 -0.753938 0.470510 0.413700 -1.822 0.06839 .
## ethnicgroup9 0.502383 1.652655 0.359743 1.397 0.16256
## ethnicgroup8 -0.077047 0.925846 0.224993 -0.342 0.73202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0606 0.9428 1.0495 1.0719
## gender2 0.7805 1.2812 0.6489 0.9388
## copd1 1.1319 0.8835 0.9236 1.3873
## ethnicgroup2 1.0969 0.9116 0.5848 2.0576
## ethnicgroup3 0.4705 2.1254 0.2091 1.0585
## ethnicgroup9 1.6527 0.6051 0.8165 3.3450
## ethnicgroup8 0.9258 1.0801 0.5957 1.4390
##
## Concordance= 0.66 (se = 0.013 )
## Likelihood ratio test= 153.9 on 7 df, p=<2e-16
## Wald test = 129.8 on 7 df, p=<2e-16
## Score (logrank) test = 128.6 on 7 df, p=<2e-16
- sometimes dropping the whole variable might be the best/safest options
- but in this case since only 4 patients with quintile 0, dropping those patients is best