The R package R6causal
implements an R6 class called
SCM
. The class aims to simplify working with structural
causal models. The missing data mechanism can be defined as a part of
the structural model.
The class contains methods for
igraph
or qgraph
causaleffect
and dosearch
dosearch
cfid
In addition, there are functions for
The class ParallelWorld
inherits SCM
and
defines a structural causal model that describes parallel worlds for
counterfactual inference.
The class LinearGaussianSCM
inherits SCM
and defines a structural causal model where all functions are linear and
all background variables follow Gaussian distribution.
Structural causal model (SCM) for a backdoor situation can be defined as follows
backdoor <- SCM$new("backdoor",
uflist = list(
uz = function(n) {return(runif(n))},
ux = function(n) {return(runif(n))},
uy = function(n) {return(runif(n))}
),
vflist = list(
z = function(uz) {
return(as.numeric(uz < 0.4))},
x = function(ux, z) {
return(as.numeric(ux < 0.2 + 0.5*z))},
y = function(uy, z, x) {
return(as.numeric(uy < 0.1 + 0.4*z + 0.4*x))}
)
)
A shortcut notation for this is
backdoor_text <- SCM$new("backdoor",
uflist = list(
uz = "n : runif(n)",
ux = "n : runif(n)",
uy = "n : runif(n)"
),
vflist = list(
z = "uz : as.numeric(uz < 0.4)",
x = "ux, z : as.numeric(ux < 0.2 + 0.5*z)",
y = "uy, z, x : as.numeric(uy < 0.1 + 0.4*z + 0.4*x)"
)
)
Alternatively the functions of SCM can be specified via conditional probability tables
backdoor_condprob <- SCM$new("backdoor",
uflist = list(
uz = function(n) {return(runif(n))},
ux = function(n) {return(runif(n))},
uy = function(n) {return(runif(n))}
),
vflist = list(
z = function(uz) {
return( generate_condprob( ycondx = data.table(z = c(0,1),
prob = c(0.6,0.4)),
x = data.table(uz = uz),
Umerge_expr = "uz"))},
x = function(ux, z) {
return( generate_condprob( ycondx = data.table(x = c(0,1,0,1),
z = c(0,0,1,1),
prob = c(0.8,0.2,0.3,0.7)),
x = data.table(z = z, ux = ux),
Umerge_expr = "ux"))},
y = function(uy, z, x) {
return( generate_condprob( ycondx = data.table(y= rep(c(0,1), 4),
z = c(0,0,1,1,0,0,1,1),
x = c(0,0,0,0,1,1,1,1),
prob = c(0.9,0.1,0.5,0.5,
0.5,0.5,0.1,0.9)),
x = data.table(z = z, x = x, uy = uy),
Umerge_expr = "uy"))}
)
)
It is possible to mix the styles and define some elements of a function list as functions, some as text and some as conditional probability tables.
A linear Gaussian SCM can be defined giving the coefficients for the structural equations:
lgbackdoor <- LinearGaussianSCM$new("Linear Gaussian Backdoor",
linear_gaussian = list(
uflist = list(ux = function(n) {rnorm(n)},
uy = function(n) {rnorm(n)},
uz = function(n) {rnorm(n)}),
vnames = c("x","y","z"),
vcoefmatrix = matrix(c(0,0.4,0,0,0,0,0.6,0.8,0),3,3),
ucoefvector = c(1,1,1),
ccoefvector = c(0,0,0)))
print(lgbackdoor)
#> Name of the model: Linear Gaussian Backdoor
#>
#> Graph:
#> z -> x
#> x -> y
#> z -> y
#>
#> Functions of background (exogenous) variables:
#>
#> $ux
#> function (n)
#> {
#> rnorm(n)
#> }
#>
#> $uy
#> function (n)
#> {
#> rnorm(n)
#> }
#>
#> $uz
#> function (n)
#> {
#> rnorm(n)
#> }
#>
#> Functions of endogenous variables:
#>
#> $x
#> function (z, ux)
#> {
#> return(0 + 0.6 * z + 1 * ux)
#> }
#> <environment: 0x56308a8a8ae8>
#>
#> $y
#> function (x, z, uy)
#> {
#> return(0 + 0.4 * x + 0.8 * z + 1 * uy)
#> }
#> <environment: 0x56308a8afae0>
#>
#> $z
#> function (uz)
#> {
#> return(0 + 1 * uz)
#> }
#> <environment: 0x56308a976920>
#>
#> Topological order of endogenous variables:
#> [1] "z" "x" "y"
#>
#> No missing data mechanism
It is also possible to generate the underlying DAG and the coefficients randomly:
randomlg <- LinearGaussianSCM$new("Random Linear Gaussian",
random_linear_gaussian = list(
nv = 6,
edgeprob=0.5,
vcoefdistr = function(n) {rnorm(n)},
ccoefdistr = function(n) {rnorm(n)},
ucoefdistr = function(n) {rnorm(n)}))
print(randomlg)
#> Name of the model: Random Linear Gaussian
#>
#> Graph:
#> v5 -> v2
#> v1 -> v3
#> v5 -> v3
#> v2 -> v4
#> v3 -> v4
#> v5 -> v4
#> v1 -> v6
#> v3 -> v6
#> v5 -> v6
#>
#> Functions of background (exogenous) variables:
#>
#> $u1
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b280e0>
#>
#> $u2
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b2c038>
#>
#> $u3
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b32a88>
#>
#> $u4
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b3a240>
#>
#> $u5
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b464e0>
#>
#> $u6
#> function (n)
#> {
#> return(rnorm(n))
#> }
#> <environment: 0x563089b768f0>
#>
#> Functions of endogenous variables:
#>
#> $v1
#> function (u1)
#> {
#> return(0.681573565839132 + -0.978310596202718 * u1)
#> }
#> <environment: 0x563089b84ea0>
#>
#> $v2
#> function (v5, u2)
#> {
#> return(-0.539636883464147 + 0.161372893025512 * v5 + -1.18669890605783 *
#> u2)
#> }
#> <environment: 0x563089b8f690>
#>
#> $v3
#> function (v1, v5, u3)
#> {
#> return(-0.758929875508947 + 1.06867119983214 * v1 + 0.567601179602566 *
#> v5 + -0.444741633115858 * u3)
#> }
#> <environment: 0x563089ba8490>
#>
#> $v4
#> function (v2, v3, v5, u4)
#> {
#> return(1.3736839992632 + -0.0226138892177697 * v2 + -1.78882769338673 *
#> v3 + -2.01585602152545 * v5 + -0.838821998963166 * u4)
#> }
#> <environment: 0x563089bb4dd0>
#>
#> $v5
#> function (u5)
#> {
#> return(-1.20814212339904 + -2.12291108091737 * u5)
#> }
#> <environment: 0x563089bb99a0>
#>
#> $v6
#> function (v1, v3, v5, u6)
#> {
#> return(1.34804406447274 + 0.725756598210448 * v1 + -0.295320820943068 *
#> v3 + -0.00392479823937809 * v5 + 0.249216057235405 *
#> u6)
#> }
#> <environment: 0x563089bddb38>
#>
#> Topological order of endogenous variables:
#> [1] "v1" "v5" "v2" "v3" "v4" "v6"
#>
#> No missing data mechanism
The print method presents the basic information on the model
backdoor
#> Name of the model: backdoor
#>
#> Graph:
#> z -> x
#> z -> y
#> x -> y
#>
#> Functions of background (exogenous) variables:
#>
#> $uz
#> function (n)
#> {
#> return(runif(n))
#> }
#>
#> $ux
#> function (n)
#> {
#> return(runif(n))
#> }
#>
#> $uy
#> function (n)
#> {
#> return(runif(n))
#> }
#>
#> Functions of endogenous variables:
#>
#> $z
#> function (uz)
#> {
#> return(as.numeric(uz < 0.4))
#> }
#>
#> $x
#> function (ux, z)
#> {
#> return(as.numeric(ux < 0.2 + 0.5 * z))
#> }
#>
#> $y
#> function (uy, z, x)
#> {
#> return(as.numeric(uy < 0.1 + 0.4 * z + 0.4 * x))
#> }
#>
#> Topological order of endogenous variables:
#> [1] "z" "x" "y"
#>
#> No missing data mechanism
The plotting method of the package igraph
is used by
default. If qgraph
is available, its plotting method can be
used as well. The argument subset
controls which variables
are plotted. Plotting parameters are passed to the plotting method.
Calling method simulate()
creates or updates data table
simdata
.
backdoor$simulate(10)
backdoor$simdata
#> uz ux uy z x y
#> <num> <num> <num> <num> <num> <num>
#> 1: 0.7716561 0.02252543 0.2931901 0 1 1
#> 2: 0.5047347 0.94368786 0.6561158 0 0 0
#> 3: 0.7333874 0.62080321 0.8647546 0 0 0
#> 4: 0.3914142 0.36495808 0.5431471 1 1 1
#> 5: 0.7626372 0.98810075 0.2713148 0 0 0
#> 6: 0.1975466 0.30713831 0.5511298 1 1 1
#> 7: 0.8049719 0.69844768 0.7255618 0 0 0
#> 8: 0.1273272 0.78697258 0.9733634 1 0 0
#> 9: 0.3454195 0.80688074 0.7580293 1 0 0
#> 10: 0.6735979 0.05822425 0.6381636 0 1 0
backdoor$simulate(8)
backdoor$simdata
#> uz ux uy z x y
#> <num> <num> <num> <num> <num> <num>
#> 1: 0.89901177 0.79716519 0.9365685 0 0 0
#> 2: 0.63735309 0.08051453 0.9542498 0 1 0
#> 3: 0.18444537 0.87411865 0.6822645 1 0 0
#> 4: 0.15791302 0.31394589 0.3650648 1 1 1
#> 5: 0.82549982 0.62083165 0.3585194 0 0 0
#> 6: 0.09477592 0.93400992 0.4145894 1 0 1
#> 7: 0.65409445 0.47578808 0.1401896 0 0 0
#> 8: 0.42810760 0.99589091 0.9718430 0 0 0
backdoor_text$simulate(20)
backdoor_condprob$simulate(30)
In an intervention, the structural equation of the target variable is changed.
backdoor_x1 <- backdoor$clone() # making a copy
backdoor_x1$intervene("x",1) # applying the intervention
backdoor_x1$plot(method = "qgraph") # to see that arrows incoming to x are cut
backdoor_x1$simulate(10) # simulating from the intervened model
backdoor_x1$simdata
#> uz ux uy z x y
#> <num> <num> <num> <num> <num> <num>
#> 1: 0.07391049 0.09526901 0.06229025 1 1 1
#> 2: 0.43141420 0.89806142 0.63704758 0 1 0
#> 3: 0.19025091 0.84264320 0.08102701 1 1 1
#> 4: 0.55525942 0.98831177 0.90708223 0 1 0
#> 5: 0.25260991 0.68646564 0.06425487 1 1 1
#> 6: 0.55022347 0.67159205 0.46306804 0 1 1
#> 7: 0.68798869 0.11419704 0.20843855 0 1 1
#> 8: 0.58564014 0.95274952 0.31108398 0 1 1
#> 9: 0.75678850 0.78308089 0.25721599 0 1 1
#> 10: 0.45766613 0.41642950 0.20678951 0 1 1
The function run_experiment
applies a set of
interventions, simulates data and collects the results.
backdoor_experiment <- run_experiment(backdoor,
intervene = list(x = c(0,1)),
response = "y",
n = 10000)
str(backdoor_experiment)
#> List of 2
#> $ interventions:Classes 'data.table' and 'data.frame': 2 obs. of 1 variable:
#> ..$ x: num [1:2] 0 1
#> ..- attr(*, ".internal.selfref")=<externalptr>
#> ..- attr(*, "sorted")= chr "x"
#> $ response_list:List of 1
#> ..$ y:Classes 'data.table' and 'data.frame': 10000 obs. of 2 variables:
#> .. ..$ V1: num [1:10000] 0 1 0 0 1 0 0 1 0 1 ...
#> .. ..$ V2: num [1:10000] 1 1 0 0 1 1 1 1 0 1 ...
#> .. ..- attr(*, ".internal.selfref")=<externalptr>
colMeans(backdoor_experiment$response_list$y)
#> V1 V2
#> 0.2567 0.6650
There are direct plugins to R packages causaleffect
,
dosearch
and cfid
that can be used to solve
identifiability problems.
backdoor$causal.effect(y = "y", x = "x")
#> [1] "\\sum_{z}P(y|z,x)P(z)"
backdoor$dosearch(data = "p(x,y,z)", query = "p(y|do(x))")
#> \sum_{z}\left(p(z)p(y|x,z)\right)
backdoor$cfid(gamma = cfid::conj(cfid::cf("Y",0), cfid::cf("X",0, c(Z=1))) )
#> The query P(y /\ x_{z'}) is not identifiable from P_*.
Let us assume that intervention do(X=0) was applied and the response Y = 0 was recorded. What is the probability that in this situation the intervention do(X=1) would have led to the response Y = 1? We estimate this probability by means of simulation.
cfdata <- counterfactual(backdoor, situation = list(do = list(target = "x", ifunction = 0),
condition = data.table( x = 0, y = 0)),
target = "x", ifunction = 1, n = 100000,
method = "rejection")
mean(cfdata$y)
#> [1] 0.5387
The result differs from P(Y = 1 | do(X = 1))
Parallel world graphs (a generalization of a twin graph) are used for
counterfactual inference with several counterfactual interventions . The
package implements class ParallelWorld
which heritates
class SCM
. A ParallelWorld
object is created
from an SCM
object by specifying the interventions for each
world. By default the variables of the parallel worlds are named with
suffixes “_1”, “_2”, …
In the example below, we have the original world (variables
x
, z
, y
) and its two variants. In
the variant 1 (variables x_1
, z_1
,
y_1
), the value of x
(variable
x_1
in the object) is set to be 0. In the variant 2
(variables x_2
, z_2
, y_2
), the
value of x
(variable x_2
in the object) is set
to be 0 and the value of z
(variable z_2
in
the object) is set to be 1.
backdoor_parallel <- ParallelWorld$new(
backdoor,
dolist=list(
list(target = "x",
ifunction = 0),
list(target = list("z","x"),
ifunction = list(1,0))
)
)
backdoor_parallel
#> Name of the model: backdoor
#>
#> Graph:
#> uz -> z
#> z -> x
#> uy -> y
#> z -> y
#> x -> y
#> uz -> z_1
#> uy -> y_1
#> z_1 -> y_1
#> x_1 -> y_1
#> uy -> y_2
#> z_2 -> y_2
#> x_2 -> y_2
#>
#> Functions of background (exogenous) variables:
#>
#> $uz
#> function (n)
#> {
#> return(runif(n))
#> }
#> <bytecode: 0x5630960293f8>
#>
#> $ux
#> function (n)
#> {
#> return(runif(n))
#> }
#> <bytecode: 0x5630960293f8>
#>
#> $uy
#> function (n)
#> {
#> return(runif(n))
#> }
#> <bytecode: 0x5630960293f8>
#>
#> Functions of endogenous variables:
#>
#> $z
#> function (uz)
#> {
#> return(as.numeric(uz < 0.4))
#> }
#> <bytecode: 0x563096185828>
#>
#> $x
#> function (ux, z)
#> {
#> return(as.numeric(ux < 0.2 + 0.5 * z))
#> }
#> <bytecode: 0x5630962b5ad8>
#>
#> $y
#> function (uy, z, x)
#> {
#> return(as.numeric(uy < 0.1 + 0.4 * z + 0.4 * x))
#> }
#> <bytecode: 0x563096447ba8>
#>
#> $z_1
#> function (uz)
#> {
#> return(as.numeric(uz < 0.4))
#> }
#>
#> $x_1
#> function (...)
#> {
#> return(constant)
#> }
#> <environment: 0x563097268848>
#>
#> $y_1
#> function (uy, z_1, x_1)
#> {
#> return(as.numeric(uy < 0.1 + 0.4 * z_1 + 0.4 * x_1))
#> }
#>
#> $z_2
#> function (...)
#> {
#> return(constant)
#> }
#> <environment: 0x563095f903e0>
#>
#> $x_2
#> function (...)
#> {
#> return(constant)
#> }
#> <environment: 0x563095f91640>
#>
#> $y_2
#> function (uy, z_2, x_2)
#> {
#> return(as.numeric(uy < 0.1 + 0.4 * z_2 + 0.4 * x_2))
#> }
#>
#> Topological order of endogenous variables:
#> [1] "x_1" "z_2" "x_2" "z" "z_1" "y_2" "x" "y_1" "y"
#>
#> No missing data mechanism
if (requireNamespace("qgraph", quietly = TRUE)) backdoor_parallel$plot(method = "qgraph")
Counterfactual data can be simulated with function
counterfactual
. In the example below, we know that variable
y
obtained value 0 in the original world as well as
variants 1 and 2. We are interested in the counterfactual distribution
of y
if x
had been set to 1.
cfdata <- counterfactual(backdoor_parallel,
situation = list(
do = NULL,
condition = data.table::data.table( y = 0, y_1 = 0, y_2 = 0)),
target = "x",
ifunction = 1,
n = 100000,
method = "rejection")
mean(cfdata$y)
#> [1] 0.12315
The printed value is a simulation based estimate for the counterfactual probability P(Y = 1).
An alternative way for answering the same question defines the case of interest as one of the parallel worlds (here variant 3).
backdoor_parallel2 <- ParallelWorld$new(
backdoor,
dolist=list(
list(target = "x",
ifunction = 0),
list(target = list("z","x"),
ifunction = list(1,0)),
list(target = "x",
ifunction = 1)
)
)
cfdata <- counterfactual(backdoor_parallel2,
situation = list(
do = NULL,
condition = data.table::data.table( y = 0, y_1 = 0, y_2 = 0)),
n = 100000,
method = "rejection")
mean(cfdata$y_3)
#> [1] 0.123
The printed value is a simulation based estimate for the counterfactual probability P(Y = 1).
The missing data mechanism is defined in similar manner as the other variables.
backdoor_md <- SCM$new("backdoor_md",
uflist = list(
uz = "n : runif(n)",
ux = "n : runif(n)",
uy = "n : runif(n)",
urz = "n : runif(n)",
urx = "n : runif(n)",
ury = "n : runif(n)"
),
vflist = list(
z = "uz : as.numeric(uz < 0.4)",
x = "ux, z : as.numeric(ux < 0.2 + 0.5*z)",
y = "uy, z, x : as.numeric(uy < 0.1 + 0.4*z + 0.4*x)"
),
rflist = list(
z = "urz : as.numeric( urz < 0.9)",
x = "urx, z : as.numeric( (urx + z)/2 < 0.9)",
y = "ury, z : as.numeric( (ury + z)/2 < 0.9)"
),
rprefix = "r_"
)
By default both complete data and incomplete data are simulated. The
incomplete dataset is named as $simdata_obs
.
backdoor_md$simulate(100)
summary(backdoor_md$simdata)
#> uz ux uy urz
#> Min. :0.006664 Min. :0.0000038 Min. :0.0007247 Min. :0.001461
#> 1st Qu.:0.213442 1st Qu.:0.2169075 1st Qu.:0.2137850 1st Qu.:0.272059
#> Median :0.558049 Median :0.4655827 Median :0.4613274 Median :0.496028
#> Mean :0.507460 Mean :0.4808888 Mean :0.4648733 Mean :0.511054
#> 3rd Qu.:0.788276 3rd Qu.:0.7923157 3rd Qu.:0.6731222 3rd Qu.:0.801602
#> Max. :0.985634 Max. :0.9973702 Max. :0.9810837 Max. :0.999791
#>
#> urx ury z x
#> Min. :0.001249 Min. :0.01333 Min. :0.00 Min. :0.00
#> 1st Qu.:0.276473 1st Qu.:0.23933 1st Qu.:0.00 1st Qu.:0.00
#> Median :0.477809 Median :0.47345 Median :0.00 Median :0.00
#> Mean :0.510921 Mean :0.49177 Mean :0.38 Mean :0.42
#> 3rd Qu.:0.743408 3rd Qu.:0.74844 3rd Qu.:1.00 3rd Qu.:1.00
#> Max. :0.990001 Max. :0.99373 Max. :1.00 Max. :1.00
#>
#> y z_md x_md y_md
#> Min. :0.0 Min. :0.0000 Min. :0.0000 Min. :0.0000
#> 1st Qu.:0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
#> Median :0.0 Median :0.0000 Median :0.0000 Median :0.0000
#> Mean :0.4 Mean :0.3908 Mean :0.3846 Mean :0.3736
#> 3rd Qu.:1.0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
#> Max. :1.0 Max. :1.0000 Max. :1.0000 Max. :1.0000
#> NA's :13 NA's :9 NA's :9
#> r_z r_x r_y
#> Min. :0.00 Min. :0.00 Min. :0.00
#> 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:1.00
#> Median :1.00 Median :1.00 Median :1.00
#> Mean :0.87 Mean :0.91 Mean :0.91
#> 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:1.00
#> Max. :1.00 Max. :1.00 Max. :1.00
#>
summary(backdoor_md$simdata_obs)
#> z_md x_md y_md r_z
#> Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00
#> 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.00
#> Median :0.0000 Median :0.0000 Median :0.0000 Median :1.00
#> Mean :0.3908 Mean :0.3846 Mean :0.3736 Mean :0.87
#> 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.00
#> Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00
#> NA's :13 NA's :9 NA's :9
#> r_x r_y
#> Min. :0.00 Min. :0.00
#> 1st Qu.:1.00 1st Qu.:1.00
#> Median :1.00 Median :1.00
#> Mean :0.91 Mean :0.91
#> 3rd Qu.:1.00 3rd Qu.:1.00
#> Max. :1.00 Max. :1.00
#>
By using the argument fixedvars
one can keep the
complete data unchanged and re-simulate the missing data mechanism.
backdoor_md$simulate(100, fixedvars = c("x","y","z","ux","uy","uz"))
summary(backdoor_md$simdata)
#> uz ux uy urz
#> Min. :0.006664 Min. :0.0000038 Min. :0.0007247 Min. :0.008648
#> 1st Qu.:0.213442 1st Qu.:0.2169075 1st Qu.:0.2137850 1st Qu.:0.284136
#> Median :0.558049 Median :0.4655827 Median :0.4613274 Median :0.504228
#> Mean :0.507460 Mean :0.4808888 Mean :0.4648733 Mean :0.506655
#> 3rd Qu.:0.788276 3rd Qu.:0.7923157 3rd Qu.:0.6731222 3rd Qu.:0.746119
#> Max. :0.985634 Max. :0.9973702 Max. :0.9810837 Max. :0.992762
#>
#> urx ury z x
#> Min. :0.0181 Min. :0.0000425 Min. :0.00 Min. :0.00
#> 1st Qu.:0.2464 1st Qu.:0.2131338 1st Qu.:0.00 1st Qu.:0.00
#> Median :0.5167 Median :0.4137118 Median :0.00 Median :0.00
#> Mean :0.5150 Mean :0.4637942 Mean :0.38 Mean :0.42
#> 3rd Qu.:0.7725 3rd Qu.:0.6953849 3rd Qu.:1.00 3rd Qu.:1.00
#> Max. :0.9835 Max. :0.9992177 Max. :1.00 Max. :1.00
#>
#> y z_md x_md y_md r_z
#> Min. :0.0 Min. :0.0 Min. :0.0000 Min. :0.0000 Min. :0.0
#> 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0
#> Median :0.0 Median :0.0 Median :0.0000 Median :0.0000 Median :1.0
#> Mean :0.4 Mean :0.4 Mean :0.4043 Mean :0.3936 Mean :0.9
#> 3rd Qu.:1.0 3rd Qu.:1.0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0
#> Max. :1.0 Max. :1.0 Max. :1.0000 Max. :1.0000 Max. :1.0
#> NA's :10 NA's :6 NA's :6
#> r_x r_y
#> Min. :0.00 Min. :0.00
#> 1st Qu.:1.00 1st Qu.:1.00
#> Median :1.00 Median :1.00
#> Mean :0.94 Mean :0.94
#> 3rd Qu.:1.00 3rd Qu.:1.00
#> Max. :1.00 Max. :1.00
#>
summary(backdoor_md$simdata_obs)
#> z_md x_md y_md r_z r_x
#> Min. :0.0 Min. :0.0000 Min. :0.0000 Min. :0.0 Min. :0.00
#> 1st Qu.:0.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0 1st Qu.:1.00
#> Median :0.0 Median :0.0000 Median :0.0000 Median :1.0 Median :1.00
#> Mean :0.4 Mean :0.4043 Mean :0.3936 Mean :0.9 Mean :0.94
#> 3rd Qu.:1.0 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0 3rd Qu.:1.00
#> Max. :1.0 Max. :1.0000 Max. :1.0000 Max. :1.0 Max. :1.00
#> NA's :10 NA's :6 NA's :6
#> r_y
#> Min. :0.00
#> 1st Qu.:1.00
#> Median :1.00
#> Mean :0.94
#> 3rd Qu.:1.00
#> Max. :1.00
#>
backdoor_md$dosearch(data = "p(x*,y*,z*,r_x,r_y,r_z)", query = "p(y|do(x))")
#> \sum_{z}\left(\frac{p(z,r_z = 1)}{p(r_z = 1)}p(y|z,r_z = 1,x,r_x = 1,r_y = 1)\right)
It is automatically recognized that the problem is a missing data
problem when rflist != NULL
.