Let’s load dataset survey from package MASS.

library(MASS)
data(survey)
survey
##       Sex Wr.Hnd NW.Hnd W.Hnd    Fold Pulse    Clap Exer Smoke Height      M.I    Age
## 1  Female   18.5   18.0 Right  R on L    92    Left Some Never 173.00   Metric 18.250
## 2    Male   19.5   20.5  Left  R on L   104    Left None Regul 177.80 Imperial 17.583
## 3    Male   18.0   13.3 Right  L on R    87 Neither None Occas     NA     <NA> 16.917
## 4    Male   18.8   18.9 Right  R on L    NA Neither None Never 160.00   Metric 20.333
## 5    Male   20.0   20.0 Right Neither    35   Right Some Never 165.00   Metric 23.667
## 6  Female   18.0   17.7 Right  L on R    64   Right Some Never 172.72 Imperial 21.000
## 7    Male   17.7   17.7 Right  L on R    83   Right Freq Never 182.88 Imperial 18.833
## 8  Female   17.0   17.3 Right  R on L    74   Right Freq Never 157.00   Metric 35.833
## 9    Male   20.0   19.5 Right  R on L    72   Right Some Never 175.00   Metric 19.000
## 10   Male   18.5   18.5 Right  R on L    90   Right Some Never 167.00   Metric 22.333
## 11 Female   17.0   17.2 Right  L on R    80   Right Freq Never 156.20 Imperial 28.500
## 12   Male   21.0   21.0 Right  R on L    68    Left Freq Never     NA     <NA> 18.250
## 13 Female   16.0   16.0 Right  L on R    NA   Right Some Never 155.00   Metric 18.750
## 14 Female   19.5   20.2 Right  L on R    66 Neither Some Never 155.00   Metric 17.500
## 15   Male   16.0   15.5 Right  R on L    60   Right Some Never     NA     <NA> 17.167
## 16 Female   17.5   17.0 Right  R on L    NA   Right Freq Never 156.00   Metric 17.167
## 17 Female   18.0   18.0 Right  L on R    89 Neither Freq Never 157.00   Metric 19.333
## 18   Male   19.4   19.2  Left  R on L    74   Right Some Never 182.88 Imperial 18.333
## 19   Male   20.5   20.5 Right  L on R    NA    Left Some Never 190.50 Imperial 19.750
## 20   Male   21.0   20.9 Right  R on L    78   Right Freq Never 177.00   Metric 17.917
## 21   Male   21.5   22.0 Right  R on L    72    Left Freq Never 190.50 Imperial 17.917
## 22   Male   20.1   20.7 Right  L on R    72   Right Freq Never 180.34 Imperial 18.167
## 23   Male   18.5   18.0 Right  L on R    64   Right Freq Never 180.34 Imperial 17.833
## 24   Male   21.5   21.2 Right  R on L    62   Right Some Never 184.00   Metric 18.250
## 25 Female   17.0   17.5 Right  R on L    64    Left Some Never     NA     <NA> 19.167
## 26   Male   18.5   18.5 Right Neither    90 Neither Some Never     NA     <NA> 17.583
## 27   Male   21.0   20.7 Right  R on L    90   Right Some Never 172.72 Imperial 17.500
## 28   Male   20.8   21.4 Right  R on L    62 Neither Freq Never 175.26 Imperial 18.083
## 29   Male   17.8   17.8 Right  L on R    76 Neither Freq Never     NA     <NA> 21.917
## 30   Male   19.5   19.5 Right  L on R    79   Right Some Never 167.00   Metric 19.250
## 31 Female   18.5   18.0 Right  R on L    76   Right None Occas     NA     <NA> 41.583
## 32   Male   18.8   18.2 Right  L on R    78   Right Freq Never 180.00   Metric 17.500
## 33 Female   17.1   17.5 Right  R on L    72   Right Freq Heavy 166.40 Imperial 39.750
## 34   Male   20.1   20.0 Right  R on L    70   Right Some Never 180.00   Metric 17.167
## 35   Male   18.0   19.0 Right  L on R    54 Neither Some Regul     NA     <NA> 17.750
## 36   Male   22.2   21.0 Right  L on R    66   Right Freq Occas 190.00   Metric 18.000
## 37 Female   16.0   16.5 Right  L on R    NA   Right Some Never 168.00   Metric 19.000
## 38   Male   19.4   18.5 Right  R on L    72 Neither Freq Never 182.50   Metric 17.917
## 39   Male   22.0   22.0 Right  R on L    80   Right Some Never 185.00   Metric 35.500
## 40   Male   19.0   19.0 Right  R on L    NA Neither Freq Occas 171.00   Metric 19.917
## 41 Female   17.5   16.0 Right  L on R    NA   Right Some Never 169.00   Metric 17.500
## 42 Female   17.8   18.0 Right  R on L    72   Right Some Never 154.94 Imperial 17.083
## 43   Male     NA     NA Right  R on L    60    <NA> Some Never 172.00   Metric 28.583
## 44 Female   20.1   20.2 Right  L on R    80   Right Some Never 176.50 Imperial 17.500
## 45 Female   13.0   13.0  <NA>  L on R    70    Left Freq Never 180.34 Imperial 17.417
## 46   Male   17.0   17.5 Right  R on L    NA Neither Freq Never 180.34 Imperial 18.500
## 47   Male   23.2   22.7 Right  L on R    84    Left Freq Regul 180.00   Metric 18.917
## 48   Male   22.5   23.0 Right  R on L    96   Right None Never 170.00   Metric 19.417
## 49 Female   18.0   17.6 Right  R on L    60   Right Some Occas 168.00   Metric 18.417
## 50 Female   18.0   17.9 Right  R on L    50    Left None Never 165.00   Metric 30.750
## 51   Male   22.0   21.5  Left  R on L    55    Left Freq Never 200.00   Metric 18.500
## 52   Male   20.5   20.0 Right  L on R    68   Right Freq Never 190.00   Metric 17.500
## 53   Male   17.0   18.0 Right  L on R    78    Left Some Never 170.18 Imperial 18.333
## 54   Male   20.5   19.5 Right  L on R    56   Right Freq Never 179.00   Metric 17.417
## 55   Male   22.5   22.5 Right  R on L    65   Right Freq Regul 182.00   Metric 20.000
## 56   Male   18.5   18.5 Right  L on R    NA Neither Freq Never 171.00   Metric 18.333
## 57 Female   15.5   15.4 Right  R on L    70 Neither None Never 157.48 Imperial 17.167
## 58   Male   19.5   19.7 Right  R on L    72   Right Freq Never     NA     <NA> 17.417
## 59   Male   19.5   19.0 Right  L on R    62   Right Freq Never 177.80 Imperial 17.667
## 60   Male   20.6   21.0  Left  L on R    NA    Left Freq Occas 175.26 Imperial 18.417
## 61   Male   22.8   23.2 Right  R on L    66 Neither Freq Never 187.00   Metric 20.333
## 62 Female   18.5   18.2 Right  R on L    72 Neither Freq Never 167.64 Imperial 17.333
## 63 Female   19.6   19.7 Right  L on R    70   Right Freq Never 178.00   Metric 17.500
## 64 Female   18.7   18.0  Left  L on R    NA    Left None Never 170.00   Metric 19.833
## 65 Female   17.3   18.0 Right  L on R    64 Neither Freq Never 164.00   Metric 18.583
## 66   Male   19.5   19.8 Right Neither    NA   Right Freq Never 183.00   Metric 18.000
## 67 Female   19.0   19.1 Right  L on R    NA Neither Freq Never 172.00   Metric 30.667
## 68 Female   18.5   18.0 Right  R on L    64   Right Freq Never     NA     <NA> 16.917
## 69   Male   19.0   19.0 Right  L on R    NA   Right Some Never 180.00   Metric 19.917
## 70   Male   21.0   19.5 Right  L on R    80    Left None  <NA>     NA     <NA> 18.333
## 71 Female   18.0   17.5 Right  L on R    64    Left Freq Never 170.00   Metric 17.583
## 72   Male   19.4   19.5 Right  R on L    NA   Right Freq Heavy 176.00   Metric 17.833
## 73 Female   17.0   16.6 Right  R on L    68   Right Some Never 171.00   Metric 17.667
## 74 Female   16.5   17.0 Right  L on R    40    Left Freq Never 167.64 Imperial 17.417
## 75 Female   15.6   15.8 Right  R on L    88    Left Some Never 165.00   Metric 17.750
## 76 Female   17.5   17.5 Right Neither    68   Right Freq Heavy 170.00   Metric 20.667
## 77 Female   17.0   17.6 Right  L on R    76   Right Some Never 165.00   Metric 23.583
## 78 Female   18.6   18.0 Right  L on R    NA Neither Freq Heavy 165.10 Imperial 17.167
## 79 Female   18.3   18.5 Right  R on L    68 Neither Some Never 165.10 Imperial 17.083
## 80   Male   20.0   20.5 Right  L on R    NA   Right Freq Never 185.42 Imperial 18.750
## 81   Male   19.5   19.5  Left  R on L    66    Left Some Never     NA     <NA> 16.750
## 82   Male   19.2   18.9 Right  R on L    76   Right Freq Never 176.50 Imperial 20.167
## 83 Female   17.5   17.5 Right  R on L    98    Left Freq Never     NA     <NA> 17.667
##  [ reached 'max' / getOption("max.print") -- omitted 154 rows ]

Goodness-of-fit

To performa a goodness-of-fit test, we want to compare whether an observed frequency is similar to expected frequency or not. For example, we have a hypothesis that 20% of people frequently exercise, 50% do some exercise, and 30% do not exercise. We will use Exer variable to test this hypothesis.

We will use function chisq.test(x, p), where x is a frequency table, and p is a vector of expected probability for each category.

ex_tab <- table(survey$Exer) # create a table to be analyzed
ex_tab # note the order of the categories.
## 
## Freq None Some 
##  115   24   98
chi <- chisq.test(ex_tab, p = c(20/100, 30/100, 50/100)) # We put exptected p for each category here. 0.2 or 20% for frequent exercise, 0.3 or 30% for no exercise, 0.5 or 50% for some exercise.  


chi # value of the chi-square test
## 
##  Chi-squared test for given probabilities
## 
## data:  ex_tab
## X-squared = 131.16, df = 2, p-value < 2.2e-16
chi$observed # observed value (same as the input table)
## 
## Freq None Some 
##  115   24   98
chi$expected # expected value
##  Freq  None  Some 
##  47.4  71.1 118.5

The \(\chi^2\) value was significant, suggesting that the observed frequencies were not distributed as expected. Looking at the results, we could see that there were more people who exercise frequently (115) more than our expectation (20% or 47.6), and there were fewer the people who do no exercise (24) than what we expected (30% or 71.1).

Contengency Table or Test of Independence

Next we would like to consider whether exercise frequencies depends on (or associated with) biological sex (Sex). The idea behind the test is that if Sex has nothing to do with exercise frequencies, then the distribution of exercise should be the same in male and female (proportion, not expected value).

se_tab <- table(survey$Sex, survey$Exer)
se_tab
##         
##          Freq None Some
##   Female   49   11   58
##   Male     65   13   40
chi2 <- chisq.test(se_tab)
chi2
## 
##  Pearson's Chi-squared test
## 
## data:  se_tab
## X-squared = 5.7184, df = 2, p-value = 0.05731
chi2$observed
##         
##          Freq None Some
##   Female   49   11   58
##   Male     65   13   40
chi2$expected # we have the same expected values for both sexes because we have equal number of male and female. 
##         
##          Freq None Some
##   Female   57   12   49
##   Male     57   12   49

The test was not significant, but very close. When we compare the expected values to the observed values, we could see that more men seems to exercise frequently than women. On ther other hand, for people with some exercise, there seems to be more female than male. Although we cannot claim to find this pattern because the \(\chi^2\) was not significant, this pattern might worth looking into in a future study.

jmv

In jamovi’s package, we can use propTestN(), proportion test (N outcomes), for a goodness-of-fit test.

library(jmv)
propTestN(data = survey, 
          var = Exer, # variable to be analyzed
          expected = TRUE, #show expected values
          ratio = c(20/100, 30/100, 50/100)
          )
## 
##  PROPORTION TEST (N OUTCOMES)
## 
##  Proportions - Exer                               
##  ──────────────────────────────────────────────── 
##    Level                Count        Proportion   
##  ──────────────────────────────────────────────── 
##    Freq     Observed          115     0.4852321   
##             Expected     47.40000     0.2000000   
##                                                   
##    None     Observed           24     0.1012658   
##             Expected     71.10000     0.3000000   
##                                                   
##    Some     Observed           98     0.4135021   
##             Expected    118.50000     0.5000000   
##  ──────────────────────────────────────────────── 
## 
## 
##  χ² Goodness of Fit               
##  ──────────────────────────────── 
##    χ²          df    p            
##  ──────────────────────────────── 
##    131.1561     2    < .0000001   
##  ────────────────────────────────

For contingency table, we use contTables().

contTables(data = survey,
           rows = Sex,
           cols = Exer,
           exp = TRUE) # show expected values
## 
##  CONTINGENCY TABLES
## 
##  Contingency Tables                                                      
##  ─────────────────────────────────────────────────────────────────────── 
##    Sex                   Freq         None        Some        Total      
##  ─────────────────────────────────────────────────────────────────────── 
##    Female    Observed           49          11          58         118   
##              Expected     57.00000    12.00000    49.00000    118.0000   
##                                                                          
##    Male      Observed           65          13          40         118   
##              Expected     57.00000    12.00000    49.00000    118.0000   
##                                                                          
##    Total     Observed          114          24          98         236   
##              Expected    114.00000    24.00000    98.00000    236.0000   
##  ─────────────────────────────────────────────────────────────────────── 
## 
## 
##  χ² Tests                              
##  ───────────────────────────────────── 
##          Value       df    p           
##  ───────────────────────────────────── 
##    χ²    5.718403     2    0.0573145   
##    N          236                      
##  ─────────────────────────────────────
 

Copyright © 2022 Kris Ariyabuddhiphongs