R Markdown - this is where explanations go.

This is an example of how to use Association Rule Mining in R The dataset used here is from Kumar’s (2005 Data Mining) It can be found on Dr. G’s shared area along with code etc. https://drive.google.com/drive/folders/1QdLLYoVj-eLK1f_9KA0VxvtwAgWca9Lj?usp=sharing

NOTE: There are methods to also include interactive graphs, but this requirements Shiny.

library(viridis)
## Loading required package: viridisLite
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(TSP)
library(data.table)
library(tcltk)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:arules':
## 
##     intersect, recode, setdiff, setequal, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(devtools)
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:data.table':
## 
##     transpose
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
## 
##     expand
library(arulesViz)
## Loading required package: grid
setwd("C:/Users/profa/Documents/R/RStudioFolder_1")
Foods <- read.transactions("KumarGroceriesTransData.csv",
                             rm.duplicates = FALSE, 
                             format = "single",
                             sep=",",
                             cols=c(1,2))

The Data has the following look

inspect(Foods)
##     items                    transactionID
## [1] {Bread,Coke,Milk}        1            
## [2] {Beer,Bread}             2            
## [3] {Beer,Coke,Diaper,Milk}  3            
## [4] {Beer,Bread,Diaper,Milk} 4            
## [5] {Coke,Diaper,Milk}       5
rules = apriori(Foods, parameter = list(support=.35, 
                                          confidence=.5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.35      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 1 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[5 item(s), 5 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [25 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

Look at the RUles and Item Freq Plot

inspect(rules)
##      lhs              rhs      support confidence lift      count
## [1]  {}            => {Bread}  0.6     0.6000000  1.0000000 3    
## [2]  {}            => {Coke}   0.6     0.6000000  1.0000000 3    
## [3]  {}            => {Beer}   0.6     0.6000000  1.0000000 3    
## [4]  {}            => {Diaper} 0.6     0.6000000  1.0000000 3    
## [5]  {}            => {Milk}   0.8     0.8000000  1.0000000 4    
## [6]  {Bread}       => {Beer}   0.4     0.6666667  1.1111111 2    
## [7]  {Beer}        => {Bread}  0.4     0.6666667  1.1111111 2    
## [8]  {Bread}       => {Milk}   0.4     0.6666667  0.8333333 2    
## [9]  {Milk}        => {Bread}  0.4     0.5000000  0.8333333 2    
## [10] {Coke}        => {Diaper} 0.4     0.6666667  1.1111111 2    
## [11] {Diaper}      => {Coke}   0.4     0.6666667  1.1111111 2    
## [12] {Coke}        => {Milk}   0.6     1.0000000  1.2500000 3    
## [13] {Milk}        => {Coke}   0.6     0.7500000  1.2500000 3    
## [14] {Beer}        => {Diaper} 0.4     0.6666667  1.1111111 2    
## [15] {Diaper}      => {Beer}   0.4     0.6666667  1.1111111 2    
## [16] {Beer}        => {Milk}   0.4     0.6666667  0.8333333 2    
## [17] {Milk}        => {Beer}   0.4     0.5000000  0.8333333 2    
## [18] {Diaper}      => {Milk}   0.6     1.0000000  1.2500000 3    
## [19] {Milk}        => {Diaper} 0.6     0.7500000  1.2500000 3    
## [20] {Coke,Diaper} => {Milk}   0.4     1.0000000  1.2500000 2    
## [21] {Coke,Milk}   => {Diaper} 0.4     0.6666667  1.1111111 2    
## [22] {Diaper,Milk} => {Coke}   0.4     0.6666667  1.1111111 2    
## [23] {Beer,Diaper} => {Milk}   0.4     1.0000000  1.2500000 2    
## [24] {Beer,Milk}   => {Diaper} 0.4     1.0000000  1.6666667 2    
## [25] {Diaper,Milk} => {Beer}   0.4     0.6666667  1.1111111 2
itemFrequencyPlot(Foods, topN=20, type="absolute")

SortedRules <- sort(rules, by="confidence", decreasing=TRUE)

RUles SOrted and Rules Summary

inspect(SortedRules[1:7])
##     lhs              rhs      support confidence lift     count
## [1] {Coke}        => {Milk}   0.6     1.00       1.250000 3    
## [2] {Diaper}      => {Milk}   0.6     1.00       1.250000 3    
## [3] {Coke,Diaper} => {Milk}   0.4     1.00       1.250000 2    
## [4] {Beer,Diaper} => {Milk}   0.4     1.00       1.250000 2    
## [5] {Beer,Milk}   => {Diaper} 0.4     1.00       1.666667 2    
## [6] {}            => {Milk}   0.8     0.80       1.000000 4    
## [7] {Milk}        => {Coke}   0.6     0.75       1.250000 3
(summary(SortedRules))
## set of 25 rules
## 
## rule length distribution (lhs + rhs):sizes
##  1  2  3 
##  5 14  6 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    2.00    2.04    2.00    3.00 
## 
## summary of quality measures:
##     support       confidence          lift            count    
##  Min.   :0.40   Min.   :0.5000   Min.   :0.8333   Min.   :2.0  
##  1st Qu.:0.40   1st Qu.:0.6667   1st Qu.:1.0000   1st Qu.:2.0  
##  Median :0.40   Median :0.6667   Median :1.1111   Median :2.0  
##  Mean   :0.48   Mean   :0.7213   Mean   :1.1000   Mean   :2.4  
##  3rd Qu.:0.60   3rd Qu.:0.7500   3rd Qu.:1.2500   3rd Qu.:3.0  
##  Max.   :0.80   Max.   :1.0000   Max.   :1.6667   Max.   :4.0  
## 
## mining info:
##   data ntransactions support confidence
##  Foods             5    0.35        0.5

Selecting or targeting specific rules

BeerRules <- apriori(data=Foods,parameter = list(supp=.001, conf=.01, minlen=2),
                     appearance = list(default="lhs", rhs="Beer"),
                     control=list(verbose=FALSE))
BeerRules <- sort(BeerRules, decreasing=TRUE, by="confidence")
inspect(BeerRules[1:4])
##     lhs                    rhs    support confidence lift     count
## [1] {Bread,Diaper}      => {Beer} 0.2     1.0000000  1.666667 1    
## [2] {Bread,Diaper,Milk} => {Beer} 0.2     1.0000000  1.666667 1    
## [3] {Bread}             => {Beer} 0.4     0.6666667  1.111111 2    
## [4] {Diaper}            => {Beer} 0.4     0.6666667  1.111111 2

Sorting RUles by LIFT

subrules <- head(sort(SortedRules, by="lift"),10)
plot(subrules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.