Bayesian Additive Regression Trees Action Set

Provides actions for fitting Bayesian additive regression trees models

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

CASL Syntax

bart.bartProbit <result=results> <status=rc> /
alpha=double,
applyRowOrder=TRUE | FALSE,
attributes={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
class={{
descending=TRUE | FALSE,
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",
ref="FIRST" | "LAST" | double | "string",
required parameter vars={"variable-name-1" <, "variable-name-2", ...>}
}, {...}},
differences={{
required parameter evtMargin="string",
label="string",
name="string",
required parameter refMargin="string"
}, {...}},
display={
caseSensitive=TRUE | FALSE,
exclude=TRUE | FALSE,
excludeAll=TRUE | FALSE,
keyIsPath=TRUE | FALSE,
names={"string-1" <, "string-2", ...>},
pathType="LABEL" | "NAME",
traceNames=TRUE | FALSE
},
freq="variable-name",
inputs={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
leafSigmaK=double,
margins={{
at={{
required parameter value="string" | double,
required parameter var="string"
}, {...}},
label="string",
required parameter name="string"
}, {...}},
maxTrainTime=double,
minLeafSize=integer,
model={
depVars={{
name="variable-name"
}, {...}},
effects={{
required parameter vars={"string-1" <, "string-2", ...>}
}, {...}}
},
nBI=integer,
nBins=integer,
nMC=integer,
nominals={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
nThin=integer,
nTree=integer,
obsLeafMapInMem=TRUE | FALSE,
offset="variable-name",
orderSplit=integer,
output={
alpha=double,
avgOnly=TRUE | FALSE,
required parameter casOut={
caslib="string"
compress=TRUE | FALSE
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>},
into="string",
intoCutPt=double,
lcl="string",
pred="string",
resid="string",
role="string",
ucl="string"
},
outputMargins={
caslib="string",
compress=TRUE | FALSE,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
outputTables={
groupByVarsRaw=TRUE | FALSE,
includeAll=TRUE | FALSE,
names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},
repeated=TRUE | FALSE,
replace=TRUE | FALSE
},
partByFrac={
seed=integer,
test=double
},
partByVar={
required parameter name="variable-name",
test="string",
train="string"
},
quantileBin=TRUE | FALSE,
sampleSummary={
avgNode="string",
required parameter casout={
caslib="string"
compress=TRUE | FALSE
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
propAccepted="string",
sampSaved="string"
},
seed=64-bit-integer,
store={
caslib="string",
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
},
required parameter table={
caslib="string",
computedOnDemand=TRUE | FALSE,
computedVars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
computedVarsProgram="string",
dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},
groupBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter name="table-name",
orderBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
singlePass=TRUE | FALSE,
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
where="where-expression",
whereTable={
casLib="string"
dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter name="table-name"
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}}
where="where-expression"
}
},
target="variable-name",
trainInMem=TRUE | FALSE,
treePrior={
depthBase=double,
depthPower=double,
pPrune=double,
pSplit=double
}
;
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the input data table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 output

required parametercasOut

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

 outputMargins

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 sampleSummary

required parametercasout

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

 store

stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default 0.05
Range (0, 1)

applyRowOrder=TRUE | FALSE

Default FALSE

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

class={{classStatement-1} <, {classStatement-2}, ...>}

names the classification variables to use as explanatory variables in the analysis.

Alias classVars

The classStatement value can be one or more of the following:

descending=TRUE | FALSE

when set to True, reverses the sort order that is imposed by the order parameter.

Default FALSE
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

specifies the reference level to use when you specify a nonsingular parameterization in the param parameter. For an individual variable, you can specify the level of the variable to use as the reference level. If the action supports the global class options parameter, then you can specify FIRST or LAST.

* vars={"variable-name-1" <, "variable-name-2", ...>}

specifies the classification variables.

Alias name

differences={{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>}

specifies differences of predictive margins.

Alias diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias evtScen
label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

specifies a distributed mode that divides the MCMC sampling in a grid environment. This mode distributes the training data to workers so that the specified number of workers have a full copy of the training data and run a separate chain. This parameter is not applicable when you are in single-machine mode. When you specify a value of 0, a single chain is run, and each worker node is assigned a portion of the training data.

Minimum value 0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default 2
Minimum value (exclusive) 0

margins={{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>}

specifies a predictive margin.

Alias scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at={{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>}

specifies the variables to modify in a predictive margin and the values they are set to.

Alias evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias variable
label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias maxTime
Minimum value (exclusive) 0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias leafSize
Default 5
Minimum value 1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default SEPARATE
MACBIG

during the training phase, treats missing values for continuous predictors as the largest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the right branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

MACSMALL

during the training phase, treats missing values for continuous predictors as the smallest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the left branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

NONE

during the training phase, excludes all observations that have a missing predictor value. In the scoring phase, observations that have missing values or observations whose unknown categorical predictor level is unknown are assigned to the larger branch of the split.

SEPARATE

during the training phase, treats missing values for continuous predictors as a separate group and treats missing values for categorical predictors as a separate level. In the training phase, when a split operation is sampled for a continuous predictor and there are observations that have a missing value of the splitting variable on the node, a primary rule for routing missing values is sampled before the primary splitting rule for nonmissing values is sampled. If a continuous predictor does not have a missing value on the node that you are splitting, a primary rule for routing missing values is not sampled. In the scoring phase, observations that have an unknown categorical predictor level or have a missing continuous predictor value for a node without a primary rule for routing missing values are assigned to the larger branch of the split.

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars={{responsevar-1} <, {responsevar-2}, ...>}

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases depVar
target
name="variable-name"

names the response variable.

effects={{effect-1} <, {effect-2}, ...>}

specifies a list of effects that define the model. Each term in this list is made up of variables specified in the vars parameter and their interaction (which can be NONE, CROSS, or BAR). When the interaction is BAR, it can be limited by the maxInteract parameter.

* vars={"string-1" <, "string-2", ...>}

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias burnin
Default 100
Minimum value 1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default 50
Minimum value 2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value 0

nMC=integer

specifies the number of MCMC iterations, excluding the burn-in iterations. This is the MCMC sample size if the thinning rate is 1. This option is ignored if you specify the nMCDist parameter and you run distributed chains.

Default 1000
Minimum value 1

nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias thin
Default 1
Minimum value 1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default 200
Minimum value 1

obsLeafMapInMem=TRUE | FALSE

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default FALSE

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default 50
Minimum value (exclusive) 0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default 0.05
Range (0, 1)
avgOnly=TRUE | FALSE

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias averageOnly
Default TRUE
* casOut={casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>}

specifies a list of one or more variables to be copied from the input table to the output table. You can alternatively specify the value ALL, ALL_MODEL, or ALL_NUMERIC, which respectively copies all variables, all variables used in the modeling, or all numeric variables from the input table to the output table.

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default 0.5
Range (0, 1)
lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases p
predicted
resid="string"

names the residual.

Aliases r
residual
role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default 0
test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range 0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form partByVar={name="variable-name"}
Shortcut form partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

specifies the formatted value of the variable that is used to assign observations to the training role. If you do not specify the train parameter, then all observations whose roles are not determined by the test and validate parameters are assigned to training.

quantileBin=TRUE | FALSE

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases qbin
qtbin
Default TRUE

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout={casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default 0
Range 0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases savemodel
save
savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=TRUE | FALSE

when set to True, stores data in memory when the model is trained.

Default FALSE

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default 0.95
Range (0, 1)
depthPower=double

specifies the power parameter used to compute the probability of splitting an internal node as a function of its depth from the root. A larger depth power value decreases the probability of splitting a node.

Default 2
Minimum value 0
pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)
pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

Lua Syntax

results, info = s:bart_bartProbit{
alpha=double,
applyRowOrder=true | false,
attributes={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
class={{
descending=true | false,
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",
ref="FIRST" | "LAST" | double | "string",
required parameter vars={"variable-name-1" <, "variable-name-2", ...>}
}, {...}},
differences={{
required parameter evtMargin="string",
label="string",
name="string",
required parameter refMargin="string"
}, {...}},
display={
caseSensitive=true | false,
exclude=true | false,
excludeAll=true | false,
keyIsPath=true | false,
names={"string-1" <, "string-2", ...>},
pathType="LABEL" | "NAME",
traceNames=true | false
},
freq="variable-name",
inputs={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
leafSigmaK=double,
margins={{
at={{
required parameter value="string" | double,
required parameter var="string"
}, {...}},
label="string",
required parameter name="string"
}, {...}},
maxTrainTime=double,
minLeafSize=integer,
model={
depVars={{
name="variable-name"
}, {...}},
effects={{
required parameter vars={"string-1" <, "string-2", ...>}
}, {...}}
},
nBI=integer,
nBins=integer,
nMC=integer,
nominals={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
nThin=integer,
nTree=integer,
obsLeafMapInMem=true | false,
offset="variable-name",
orderSplit=integer,
output={
alpha=double,
avgOnly=true | false,
required parameter casOut={
caslib="string"
compress=true | false
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=true | false
replace=true | false
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>},
into="string",
intoCutPt=double,
lcl="string",
pred="string",
resid="string",
role="string",
ucl="string"
},
outputMargins={
caslib="string",
compress=true | false,
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where={"string-1" <, "string-2", ...>}
},
outputTables={
groupByVarsRaw=true | false,
includeAll=true | false,
names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},
repeated=true | false,
replace=true | false
},
partByFrac={
seed=integer,
test=double
},
partByVar={
required parameter name="variable-name",
test="string",
train="string"
},
quantileBin=true | false,
sampleSummary={
avgNode="string",
required parameter casout={
caslib="string"
compress=true | false
indexVars={"variable-name-1" <, "variable-name-2", ...>}
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=true | false
replace=true | false
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where={"string-1" <, "string-2", ...>}
},
propAccepted="string",
sampSaved="string"
},
seed=64-bit-integer,
store={
caslib="string",
indexVars={"variable-name-1" <, "variable-name-2", ...>},
label="string",
lifetime=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=true | false,
replace=true | false,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
},
required parameter table={
caslib="string",
computedOnDemand=true | false,
computedVars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
computedVarsProgram="string",
dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},
groupBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter name="table-name",
orderBy={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
singlePass=true | false,
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}},
where="where-expression",
whereTable={
casLib="string"
dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter name="table-name"
vars={{
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
}, {...}}
where="where-expression"
}
},
target="variable-name",
trainInMem=true | false,
treePrior={
depthBase=double,
depthPower=double,
pPrune=double,
pSplit=double
}
}
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the input data table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 output

required parametercasOut

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

 outputMargins

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 sampleSummary

required parametercasout

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

 store

stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default 0.05
Range (0, 1)

applyRowOrder=true | false

Default false

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

class={{classStatement-1} <, {classStatement-2}, ...>}

names the classification variables to use as explanatory variables in the analysis.

Alias classVars

The classStatement value can be one or more of the following:

descending=true | false

when set to True, reverses the sort order that is imposed by the order parameter.

Default false
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

specifies the reference level to use when you specify a nonsingular parameterization in the param parameter. For an individual variable, you can specify the level of the variable to use as the reference level. If the action supports the global class options parameter, then you can specify FIRST or LAST.

* vars={"variable-name-1" <, "variable-name-2", ...>}

specifies the classification variables.

Alias name

differences={{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>}

specifies differences of predictive margins.

Alias diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias evtScen
label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

specifies a distributed mode that divides the MCMC sampling in a grid environment. This mode distributes the training data to workers so that the specified number of workers have a full copy of the training data and run a separate chain. This parameter is not applicable when you are in single-machine mode. When you specify a value of 0, a single chain is run, and each worker node is assigned a portion of the training data.

Minimum value 0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default 2
Minimum value (exclusive) 0

margins={{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>}

specifies a predictive margin.

Alias scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at={{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>}

specifies the variables to modify in a predictive margin and the values they are set to.

Alias evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias variable
label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias maxTime
Minimum value (exclusive) 0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias leafSize
Default 5
Minimum value 1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default SEPARATE
MACBIG

during the training phase, treats missing values for continuous predictors as the largest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the right branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

MACSMALL

during the training phase, treats missing values for continuous predictors as the smallest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the left branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

NONE

during the training phase, excludes all observations that have a missing predictor value. In the scoring phase, observations that have missing values or observations whose unknown categorical predictor level is unknown are assigned to the larger branch of the split.

SEPARATE

during the training phase, treats missing values for continuous predictors as a separate group and treats missing values for categorical predictors as a separate level. In the training phase, when a split operation is sampled for a continuous predictor and there are observations that have a missing value of the splitting variable on the node, a primary rule for routing missing values is sampled before the primary splitting rule for nonmissing values is sampled. If a continuous predictor does not have a missing value on the node that you are splitting, a primary rule for routing missing values is not sampled. In the scoring phase, observations that have an unknown categorical predictor level or have a missing continuous predictor value for a node without a primary rule for routing missing values are assigned to the larger branch of the split.

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars={{responsevar-1} <, {responsevar-2}, ...>}

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases depVar
target
name="variable-name"

names the response variable.

effects={{effect-1} <, {effect-2}, ...>}

specifies a list of effects that define the model. Each term in this list is made up of variables specified in the vars parameter and their interaction (which can be NONE, CROSS, or BAR). When the interaction is BAR, it can be limited by the maxInteract parameter.

* vars={"string-1" <, "string-2", ...>}

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias burnin
Default 100
Minimum value 1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default 50
Minimum value 2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value 0

nMC=integer

specifies the number of MCMC iterations, excluding the burn-in iterations. This is the MCMC sample size if the thinning rate is 1. This option is ignored if you specify the nMCDist parameter and you run distributed chains.

Default 1000
Minimum value 1

nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias thin
Default 1
Minimum value 1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default 200
Minimum value 1

obsLeafMapInMem=true | false

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default false

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default 50
Minimum value (exclusive) 0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default 0.05
Range (0, 1)
avgOnly=true | false

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias averageOnly
Default true
* casOut={casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>}

specifies a list of one or more variables to be copied from the input table to the output table. You can alternatively specify the value ALL, ALL_MODEL, or ALL_NUMERIC, which respectively copies all variables, all variables used in the modeling, or all numeric variables from the input table to the output table.

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default 0.5
Range (0, 1)
lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases p
predicted
resid="string"

names the residual.

Aliases r
residual
role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default 0
test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range 0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form partByVar={name="variable-name"}
Shortcut form partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

specifies the formatted value of the variable that is used to assign observations to the training role. If you do not specify the train parameter, then all observations whose roles are not determined by the test and validate parameters are assigned to training.

quantileBin=true | false

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases qbin
qtbin
Default true

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout={casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default 0
Range 0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases savemodel
save
savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=true | false

when set to True, stores data in memory when the model is trained.

Default false

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default 0.95
Range (0, 1)
depthPower=double

specifies the power parameter used to compute the probability of splitting an internal node as a function of its depth from the root. A larger depth power value decreases the probability of splitting a node.

Default 2
Minimum value 0
pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)
pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

Python Syntax

results=s.bart.bartProbit(
alpha=double,
applyRowOrder=True | False,
attributes=[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
class_=[{
"descending":True | False,
"order":"FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",
"ref":"FIRST" | "LAST" | double | "string",
required parameter "vars":["variable-name-1" <, "variable-name-2", ...>]
}<, {...}>],
differences=[{
required parameter "evtMargin":"string",
"label":"string",
"name":"string",
required parameter "refMargin":"string"
}<, {...}>],
display={
"caseSensitive":True | False,
"exclude":True | False,
"excludeAll":True | False,
"keyIsPath":True | False,
"names":["string-1" <, "string-2", ...>],
"pathType":"LABEL" | "NAME",
"traceNames":True | False
},
freq="variable-name",
inputs=[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
leafSigmaK=double,
margins=[{
"at":[{
required parameter "value":"string" | double,
required parameter "var":"string"
}<, {...}>],
"label":"string",
required parameter "name":"string"
}<, {...}>],
maxTrainTime=double,
minLeafSize=integer,
model={
"depVars":[{
"name":"variable-name"
}<, {...}>],
"effects":[{
required parameter "vars":["string-1" <, "string-2", ...>]
}<, {...}>]
},
nBI=integer,
nBins=integer,
nMC=integer,
nominals=[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
nThin=integer,
nTree=integer,
obsLeafMapInMem=True | False,
offset="variable-name",
orderSplit=integer,
output={
"alpha":double,
"avgOnly":True | False,
required parameter "casOut":{
"caslib":"string"
"compress":True | False
"indexVars":["variable-name-1" <, "variable-name-2", ...>]
"label":"string"
"lifetime":64-bit-integer
"maxMemSize":64-bit-integer
"memoryFormat":"DVR" | "INHERIT" | "STANDARD"
"name":"table-name"
"promote":True | False
"replace":True | False
"replication":integer
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"
"threadBlockSize":64-bit-integer
"timeStamp":"string"
"where":["string-1" <, "string-2", ...>]
},
"copyVars":"ALL" | "ALL_MODEL" | "ALL_NUMERIC" | ["variable-name-1" <, "variable-name-2", ...>],
"into":"string",
"intoCutPt":double,
"lcl":"string",
"pred":"string",
"resid":"string",
"role":"string",
"ucl":"string"
},
outputMargins={
"caslib":"string",
"compress":True | False,
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"maxMemSize":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",
"threadBlockSize":64-bit-integer,
"timeStamp":"string",
"where":["string-1" <, "string-2", ...>]
},
outputTables={
"groupByVarsRaw":True | False,
"includeAll":True | False,
"names":["string-1" <, "string-2", ...>] | {"key-1":{casouttable-1} <, "key-2":{casouttable-2}, ...>},
"repeated":True | False,
"replace":True | False
},
partByFrac={
"seed":integer,
"test":double
},
partByVar={
required parameter "name":"variable-name",
"test":"string",
"train":"string"
},
quantileBin=True | False,
sampleSummary={
"avgNode":"string",
required parameter "casout":{
"caslib":"string"
"compress":True | False
"indexVars":["variable-name-1" <, "variable-name-2", ...>]
"label":"string"
"lifetime":64-bit-integer
"maxMemSize":64-bit-integer
"memoryFormat":"DVR" | "INHERIT" | "STANDARD"
"name":"table-name"
"promote":True | False
"replace":True | False
"replication":integer
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"
"threadBlockSize":64-bit-integer
"timeStamp":"string"
"where":["string-1" <, "string-2", ...>]
},
"propAccepted":"string",
"sampSaved":"string"
},
seed=64-bit-integer,
store={
"caslib":"string",
"indexVars":["variable-name-1" <, "variable-name-2", ...>],
"label":"string",
"lifetime":64-bit-integer,
"memoryFormat":"DVR" | "INHERIT" | "STANDARD",
"name":"table-name",
"promote":True | False,
"replace":True | False,
"replication":integer,
"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"
},
required parameter table={
"caslib":"string",
"computedOnDemand":True | False,
"computedVars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"computedVarsProgram":"string",
"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},
"groupBy":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"groupByMode":"NOSORT" | "REDISTRIBUTE",
"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},
required parameter "name":"table-name",
"orderBy":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"singlePass":True | False,
"vars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>],
"where":"where-expression",
"whereTable":{
"casLib":"string"
"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}
"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}
required parameter "name":"table-name"
"vars":[{
"format":"string",
"formattedLength":integer,
"label":"string",
required parameter "name":"variable-name",
"nfd":integer,
"nfl":integer
}<, {...}>]
"where":"where-expression"
}
},
target="variable-name",
trainInMem=True | False,
treePrior={
"depthBase":double,
"depthPower":double,
"pPrune":double,
"pSplit":double
}
)
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the input data table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 output

required parametercasOut

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

 outputMargins

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 sampleSummary

required parametercasout

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

 store

stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default 0.05
Range (0, 1)

applyRowOrder=True | False

Default False

attributes=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

class_=[{classStatement-1} <, {classStatement-2}, ...>]

names the classification variables to use as explanatory variables in the analysis.

Alias classVars

The classStatement value can be one or more of the following:

"descending":True | False

when set to True, reverses the sort order that is imposed by the order parameter.

Default False
"order":"FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

"ref":"FIRST" | "LAST" | double | "string"

specifies the reference level to use when you specify a nonsingular parameterization in the param parameter. For an individual variable, you can specify the level of the variable to use as the reference level. If the action supports the global class options parameter, then you can specify FIRST or LAST.

* "vars":["variable-name-1" <, "variable-name-2", ...>]

specifies the classification variables.

Alias name

differences=[{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>]

specifies differences of predictive margins.

Alias diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* "evtMargin":"string"

specifies the event predictive margin by its name.

Alias evtScen
"label":"string"

labels the difference in predictive margins in output tables.

"name":"string"

names the difference in predictive margins in output tables.

* "refMargin":"string"

specifies the reference predictive margin by its name.

Alias refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

specifies a distributed mode that divides the MCMC sampling in a grid environment. This mode distributes the training data to workers so that the specified number of workers have a full copy of the training data and run a separate chain. This parameter is not applicable when you are in single-machine mode. When you specify a value of 0, a single chain is run, and each worker node is assigned a portion of the training data.

Minimum value 0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default 2
Minimum value (exclusive) 0

margins=[{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>]

specifies a predictive margin.

Alias scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

"at":[{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>]

specifies the variables to modify in a predictive margin and the values they are set to.

Alias evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* "value":"string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* "var":"string"

names a variable to modify in a predictive margin.

Alias variable
"label":"string"

labels the predictive margin in output tables.

* "name":"string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias maxTime
Minimum value (exclusive) 0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias leafSize
Default 5
Minimum value 1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default SEPARATE
MACBIG

during the training phase, treats missing values for continuous predictors as the largest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the right branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

MACSMALL

during the training phase, treats missing values for continuous predictors as the smallest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the left branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

NONE

during the training phase, excludes all observations that have a missing predictor value. In the scoring phase, observations that have missing values or observations whose unknown categorical predictor level is unknown are assigned to the larger branch of the split.

SEPARATE

during the training phase, treats missing values for continuous predictors as a separate group and treats missing values for categorical predictors as a separate level. In the training phase, when a split operation is sampled for a continuous predictor and there are observations that have a missing value of the splitting variable on the node, a primary rule for routing missing values is sampled before the primary splitting rule for nonmissing values is sampled. If a continuous predictor does not have a missing value on the node that you are splitting, a primary rule for routing missing values is not sampled. In the scoring phase, observations that have an unknown categorical predictor level or have a missing continuous predictor value for a node without a primary rule for routing missing values are assigned to the larger branch of the split.

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

"depVars":[{responsevar-1} <, {responsevar-2}, ...>]

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases depVar
target
"name":"variable-name"

names the response variable.

"effects":[{effect-1} <, {effect-2}, ...>]

specifies a list of effects that define the model. Each term in this list is made up of variables specified in the vars parameter and their interaction (which can be NONE, CROSS, or BAR). When the interaction is BAR, it can be limited by the maxInteract parameter.

* "vars":["string-1" <, "string-2", ...>]

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias burnin
Default 100
Minimum value 1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default 50
Minimum value 2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value 0

nMC=integer

specifies the number of MCMC iterations, excluding the burn-in iterations. This is the MCMC sample size if the thinning rate is 1. This option is ignored if you specify the nMCDist parameter and you run distributed chains.

Default 1000
Minimum value 1

nominals=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias thin
Default 1
Minimum value 1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default 200
Minimum value 1

obsLeafMapInMem=True | False

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default False

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default 50
Minimum value (exclusive) 0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

"alpha":double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default 0.05
Range (0, 1)
"avgOnly":True | False

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias averageOnly
Default True
* "casOut":{casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"copyVars":"ALL" | "ALL_MODEL" | "ALL_NUMERIC" | ["variable-name-1" <, "variable-name-2", ...>]

specifies a list of one or more variables to be copied from the input table to the output table. You can alternatively specify the value ALL, ALL_MODEL, or ALL_NUMERIC, which respectively copies all variables, all variables used in the modeling, or all numeric variables from the input table to the output table.

"into":"string"

names the predicted response level. The default name is Into.

"intoCutPt":double

specifies the predicted event probability that determines the predicted binary response level.

Default 0.5
Range (0, 1)
"lcl":"string"

names the equal-tail lower credible limit.

"pred":"string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases p
predicted
"resid":"string"

names the residual.

Aliases r
residual
"role":"string"

identifies the training and test roles for observations.

"ucl":"string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

"seed":integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default 0
"test":double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range 0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form partByVar={"name":"variable-name"}
Shortcut form partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* "name":"variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

"test":"string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

"train":"string"

specifies the formatted value of the variable that is used to assign observations to the training role. If you do not specify the train parameter, then all observations whose roles are not determined by the test and validate parameters are assigned to training.

quantileBin=True | False

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases qbin
qtbin
Default True

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

"avgNode":"string"

names the variable that contains average number of nodes per tree in the sample.

* "casout":{casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"propAccepted":"string"

names the variable that contains proportion of accepted tree modifications.

"sampSaved":"string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default 0
Range 0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases savemodel
save
savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=True | False

when set to True, stores data in memory when the model is trained.

Default False

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

"depthBase":double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default 0.95
Range (0, 1)
"depthPower":double

specifies the power parameter used to compute the probability of splitting an internal node as a function of its depth from the root. A larger depth power value decreases the probability of splitting a node.

Default 2
Minimum value 0
"pPrune":double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)
"pSplit":double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

R Syntax

results <– cas.bart.bartProbit(s,
alpha=double,
applyRowOrder=TRUE | FALSE,
attributes=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
class=list( list(
descending=TRUE | FALSE,
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",
ref="FIRST" | "LAST" | double | "string",
required parameter vars=list("variable-name-1" <, "variable-name-2", ...>)
) <, list(...)>),
differences=list( list(
required parameter evtMargin="string",
label="string",
name="string",
required parameter refMargin="string"
) <, list(...)>),
display=list(
caseSensitive=TRUE | FALSE,
exclude=TRUE | FALSE,
excludeAll=TRUE | FALSE,
keyIsPath=TRUE | FALSE,
names=list("string-1" <, "string-2", ...>),
pathType="LABEL" | "NAME",
traceNames=TRUE | FALSE
),
freq="variable-name",
inputs=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
leafSigmaK=double,
margins=list( list(
at=list( list(
required parameter value="string" | double,
required parameter var="string"
) <, list(...)>),
label="string",
required parameter name="string"
) <, list(...)>),
maxTrainTime=double,
minLeafSize=integer,
model=list(
depVars=list( list(
name="variable-name"
) <, list(...)>),
effects=list( list(
required parameter vars=list("string-1" <, "string-2", ...>)
) <, list(...)>)
),
nBI=integer,
nBins=integer,
nMC=integer,
nominals=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
nThin=integer,
nTree=integer,
obsLeafMapInMem=TRUE | FALSE,
offset="variable-name",
orderSplit=integer,
output=list(
alpha=double,
avgOnly=TRUE | FALSE,
required parameter casOut=list(
caslib="string"
compress=TRUE | FALSE
indexVars=list("variable-name-1" <, "variable-name-2", ...>)
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where=list("string-1" <, "string-2", ...>)
),
copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | list("variable-name-1" <, "variable-name-2", ...>),
into="string",
intoCutPt=double,
lcl="string",
pred="string",
resid="string",
role="string",
ucl="string"
),
outputMargins=list(
caslib="string",
compress=TRUE | FALSE,
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
maxMemSize=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",
threadBlockSize=64-bit-integer,
timeStamp="string",
where=list("string-1" <, "string-2", ...>)
),
outputTables=list(
groupByVarsRaw=TRUE | FALSE,
includeAll=TRUE | FALSE,
names=list("string-1" <, "string-2", ...>) | list(key-1=list(casouttable-1) <, key-2=list(casouttable-2), ...>),
repeated=TRUE | FALSE,
replace=TRUE | FALSE
),
partByFrac=list(
seed=integer,
test=double
),
partByVar=list(
required parameter name="variable-name",
test="string",
train="string"
),
quantileBin=TRUE | FALSE,
sampleSummary=list(
avgNode="string",
required parameter casout=list(
caslib="string"
compress=TRUE | FALSE
indexVars=list("variable-name-1" <, "variable-name-2", ...>)
label="string"
lifetime=64-bit-integer
maxMemSize=64-bit-integer
memoryFormat="DVR" | "INHERIT" | "STANDARD"
name="table-name"
promote=TRUE | FALSE
replace=TRUE | FALSE
replication=integer
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
threadBlockSize=64-bit-integer
timeStamp="string"
where=list("string-1" <, "string-2", ...>)
),
propAccepted="string",
sampSaved="string"
),
seed=64-bit-integer,
store=list(
caslib="string",
indexVars=list("variable-name-1" <, "variable-name-2", ...>),
label="string",
lifetime=64-bit-integer,
memoryFormat="DVR" | "INHERIT" | "STANDARD",
name="table-name",
promote=TRUE | FALSE,
replace=TRUE | FALSE,
replication=integer,
tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"
),
required parameter table=list(
caslib="string",
computedOnDemand=TRUE | FALSE,
computedVars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
computedVarsProgram="string",
dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),
groupBy=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
groupByMode="NOSORT" | "REDISTRIBUTE",
importOptions=list(fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters),
required parameter name="table-name",
orderBy=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
singlePass=TRUE | FALSE,
vars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>),
where="where-expression",
whereTable=list(
casLib="string"
dataSourceOptions=list(adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters)
importOptions=list(fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters)
required parameter name="table-name"
vars=list( list(
format="string",
formattedLength=integer,
label="string",
required parameter name="variable-name",
nfd=integer,
nfl=integer
) <, list(...)>)
where="where-expression"
)
),
target="variable-name",
trainInMem=TRUE | FALSE,
treePrior=list(
depthBase=double,
depthPower=double,
pPrune=double,
pSplit=double
)
)
indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables

Parameter

Subparameter

Description

required parametertable

specifies the input data table.

Parameters for Creating Output Tables

Parameter

Subparameter

Description

 output

required parametercasOut

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

 outputMargins

 outputTables

names

lists the names of results tables to save as CAS tables on the server.

 sampleSummary

required parametercasout

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

 store

stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default 0.05
Range (0, 1)

applyRowOrder=TRUE | FALSE

Default FALSE

attributes=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias attribute

class=list( list(classStatement-1) <, list(classStatement-2), ...>)

names the classification variables to use as explanatory variables in the analysis.

Alias classVars

The classStatement value can be one or more of the following:

descending=TRUE | FALSE

when set to True, reverses the sort order that is imposed by the order parameter.

Default FALSE
order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

specifies the reference level to use when you specify a nonsingular parameterization in the param parameter. For an individual variable, you can specify the level of the variable to use as the reference level. If the action supports the global class options parameter, then you can specify FIRST or LAST.

* vars=list("variable-name-1" <, "variable-name-2", ...>)

specifies the classification variables.

Alias name

differences=list( list(bartScoreMargin_scoreDiff-1) <, list(bartScoreMargin_scoreDiff-2), ...>)

specifies differences of predictive margins.

Alias diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias evtScen
label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias refScen

display=list(displayTables)

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

specifies a distributed mode that divides the MCMC sampling in a grid environment. This mode distributes the training data to workers so that the specified number of workers have a full copy of the training data and run a separate chain. This parameter is not applicable when you are in single-machine mode. When you specify a value of 0, a single chain is run, and each worker node is assigned a portion of the training data.

Minimum value 0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default 2
Minimum value (exclusive) 0

margins=list( list(bartScoreMargin_evaluate-1) <, list(bartScoreMargin_evaluate-2), ...>)

specifies a predictive margin.

Alias scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at=list( list(bartScoreMargin_varValue-1) <, list(bartScoreMargin_varValue-2), ...>)

specifies the variables to modify in a predictive margin and the values they are set to.

Alias evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias variable
label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias maxTime
Minimum value (exclusive) 0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias leafSize
Default 5
Minimum value 1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default SEPARATE
MACBIG

during the training phase, treats missing values for continuous predictors as the largest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the right branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

MACSMALL

during the training phase, treats missing values for continuous predictors as the smallest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the left branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

NONE

during the training phase, excludes all observations that have a missing predictor value. In the scoring phase, observations that have missing values or observations whose unknown categorical predictor level is unknown are assigned to the larger branch of the split.

SEPARATE

during the training phase, treats missing values for continuous predictors as a separate group and treats missing values for categorical predictors as a separate level. In the training phase, when a split operation is sampled for a continuous predictor and there are observations that have a missing value of the splitting variable on the node, a primary rule for routing missing values is sampled before the primary splitting rule for nonmissing values is sampled. If a continuous predictor does not have a missing value on the node that you are splitting, a primary rule for routing missing values is not sampled. In the scoring phase, observations that have an unknown categorical predictor level or have a missing continuous predictor value for a node without a primary rule for routing missing values are assigned to the larger branch of the split.

model=list(bartProbitModel)

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars=list( list(responsevar-1) <, list(responsevar-2), ...>)

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases depVar
target
name="variable-name"

names the response variable.

effects=list( list(effect-1) <, list(effect-2), ...>)

specifies a list of effects that define the model. Each term in this list is made up of variables specified in the vars parameter and their interaction (which can be NONE, CROSS, or BAR). When the interaction is BAR, it can be limited by the maxInteract parameter.

* vars=list("string-1" <, "string-2", ...>)

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias burnin
Default 100
Minimum value 1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default 50
Minimum value 2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value 0

nMC=integer

specifies the number of MCMC iterations, excluding the burn-in iterations. This is the MCMC sample size if the thinning rate is 1. This option is ignored if you specify the nMCDist parameter and you run distributed chains.

Default 1000
Minimum value 1

nominals=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias thin
Default 1
Minimum value 1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default 200
Minimum value 1

obsLeafMapInMem=TRUE | FALSE

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default FALSE

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default 50
Minimum value (exclusive) 0

output=list(bartBinOutputStatement)

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default 0.05
Range (0, 1)
avgOnly=TRUE | FALSE

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias averageOnly
Default TRUE
* casOut=list(casouttable)

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | list("variable-name-1" <, "variable-name-2", ...>)

specifies a list of one or more variables to be copied from the input table to the output table. You can alternatively specify the value ALL, ALL_MODEL, or ALL_NUMERIC, which respectively copies all variables, all variables used in the modeling, or all numeric variables from the input table to the output table.

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default 0.5
Range (0, 1)
lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases p
predicted
resid="string"

names the residual.

Aliases r
residual
role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins=list(casouttable)

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables=list(outputTables)

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias displayOut

partByFrac=list(partByFracStatement)

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default 0
test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range 0–1

partByVar=list(partByVarStatement)

names the variable and its values used to partition the data into training and testing roles.

Long form partByVar=list(name="variable-name")
Shortcut form partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

specifies the formatted value of the variable that is used to assign observations to the training role. If you do not specify the train parameter, then all observations whose roles are not determined by the test and validate parameters are assigned to training.

quantileBin=TRUE | FALSE

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases qbin
qtbin
Default TRUE

sampleSummary=list(bartProbit_sampleSummary)

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout=list(casouttable)

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default 0
Range 0–4294967295

store=list(casouttablebasic)

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases savemodel
save
savestate

* table=list(castable)

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=TRUE | FALSE

when set to True, stores data in memory when the model is trained.

Default FALSE

treePrior=list(bart_treePrior)

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default 0.95
Range (0, 1)
depthPower=double

specifies the power parameter used to compute the probability of splitting an internal node as a function of its depth from the root. A larger depth power value decreases the probability of splitting a node.

Default 2
Minimum value 0
pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)
pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default 0.5
Range (0, 1)
Last updated: March 05, 2026