Bayesian Additive Regression Trees Action Set

Provides actions for fitting Bayesian additive regression trees models

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

CASL Syntax
Summary: Input and Output Tables
Parameter Descriptions

CASL Syntax

bart.bartProbit <result=results> <status=rc> /

alpha=double,

applyRowOrder=TRUE | FALSE,

attributes={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

class={{

descending=TRUE | FALSE,

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",

ref="FIRST" | "LAST" | double | "string",

vars={"variable-name-1" <, "variable-name-2", ...>}

}, {...}},

differences={{

evtMargin="string",

label="string",

name="string",

refMargin="string"

}, {...}},

display={

caseSensitive=TRUE | FALSE,

exclude=TRUE | FALSE,

excludeAll=TRUE | FALSE,

keyIsPath=TRUE | FALSE,

names={"string-1" <, "string-2", ...>},

pathType="LABEL" | "NAME",

traceNames=TRUE | FALSE

distributeChains=integer,

freq="variable-name",

inputs={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

leafSigmaK=double,

margins={{

at={{

value="string" | double,

var="string"

}, {...}},

label="string",

name="string"

}, {...}},

maxTrainTime=double,

minLeafSize=integer,

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE",

model={

depVars={{

name="variable-name"

}, {...}},

effects={{

vars={"string-1" <, "string-2", ...>}

}, {...}}

nBI=integer,

nBins=integer,

nClassLevelsPrint=integer,

nMC=integer,

nominals={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

nThin=integer,

nTree=integer,

obsLeafMapInMem=TRUE | FALSE,

offset="variable-name",

orderSplit=integer,

output={

alpha=double,

avgOnly=TRUE | FALSE,

casOut={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>},

into="string",

intoCutPt=double,

lcl="string",

pred="string",

resid="string",

role="string",

ucl="string"

outputMargins={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

outputTables={

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=TRUE | FALSE,

replace=TRUE | FALSE

partByFrac={

seed=integer,

test=double

partByVar={

name="variable-name",

test="string",

train="string"

quantileBin=TRUE | FALSE,

sampleSummary={

avgNode="string",

casout={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

propAccepted="string",

sampSaved="string"

seed=64-bit-integer,

store={

caslib="string",

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

table={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

target="variable-name",

trainInMem=TRUE | FALSE,

treePrior={

depthBase=double,

depthPower=double,

pPrune=double,

pSplit=double

}

;

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the input data table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut	creates a table on the server that contains observationwise statistics, which are computed after the model is fit.
outputMargins	—
outputTables	names	lists the names of results tables to save as CAS tables on the server.
sampleSummary	required parametercasout	creates a table on the server that contains a summary of the sum-of-trees ensemble samples.
store	—	stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default	0.05
Range	(0, 1)

applyRowOrder=TRUE | FALSE

Default	FALSE

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	attribute

class={{classStatement-1} <, {classStatement-2}, ...>}

names the classification variables to use as explanatory variables in the analysis.

Alias	classVars

The classStatement value can be one or more of the following:

descending=TRUE | FALSE

when set to True, reverses the sort order that is imposed by the order parameter.

Default	FALSE

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

specifies the reference level to use when you specify a nonsingular parameterization in the param parameter. For an individual variable, you can specify the level of the variable to use as the reference level. If the action supports the global class options parameter, then you can specify FIRST or LAST.

* vars={"variable-name-1" <, "variable-name-2", ...>}

specifies the classification variables.

Alias	name

differences={{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>}

specifies differences of predictive margins.

Alias	diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias	evtScen

label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias	refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

specifies a distributed mode that divides the MCMC sampling in a grid environment. This mode distributes the training data to workers so that the specified number of workers have a full copy of the training data and run a separate chain. This parameter is not applicable when you are in single-machine mode. When you specify a value of 0, a single chain is run, and each worker node is assigned a portion of the training data.

Minimum value	0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default	2
Minimum value (exclusive)	0

margins={{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>}

specifies a predictive margin.

Alias	scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at={{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>}

specifies the variables to modify in a predictive margin and the values they are set to.

Alias	evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias	variable

label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias	maxTime
Minimum value (exclusive)	0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias	leafSize
Default	5
Minimum value	1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default	SEPARATE

MACBIG

during the training phase, treats missing values for continuous predictors as the largest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the right branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

MACSMALL

during the training phase, treats missing values for continuous predictors as the smallest machine value and treats missing values for categorical predictors as a separate level. In the scoring phase, observations that have missing continuous predictor values are assigned to the left branch of the split, and observations that have an unknown categorical predictor level are assigned to the larger branch of the split.

NONE

during the training phase, excludes all observations that have a missing predictor value. In the scoring phase, observations that have missing values or observations whose unknown categorical predictor level is unknown are assigned to the larger branch of the split.

SEPARATE

during the training phase, treats missing values for continuous predictors as a separate group and treats missing values for categorical predictors as a separate level. In the training phase, when a split operation is sampled for a continuous predictor and there are observations that have a missing value of the splitting variable on the node, a primary rule for routing missing values is sampled before the primary splitting rule for nonmissing values is sampled. If a continuous predictor does not have a missing value on the node that you are splitting, a primary rule for routing missing values is not sampled. In the scoring phase, observations that have an unknown categorical predictor level or have a missing continuous predictor value for a node without a primary rule for routing missing values are assigned to the larger branch of the split.

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars={{responsevar-1} <, {responsevar-2}, ...>}

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases	depVar
Aliases	target

name="variable-name"

names the response variable.

effects={{effect-1} <, {effect-2}, ...>}

specifies a list of effects that define the model. Each term in this list is made up of variables specified in the vars parameter and their interaction (which can be NONE, CROSS, or BAR). When the interaction is BAR, it can be limited by the maxInteract parameter.

* vars={"string-1" <, "string-2", ...>}

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias	burnin
Default	100
Minimum value	1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default	50
Minimum value	2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value	0

nMC=integer

specifies the number of MCMC iterations, excluding the burn-in iterations. This is the MCMC sample size if the thinning rate is 1. This option is ignored if you specify the nMCDist parameter and you run distributed chains.

Default	1000
Minimum value	1

nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias	thin
Default	1
Minimum value	1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default	200
Minimum value	1

obsLeafMapInMem=TRUE | FALSE

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default	FALSE

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default	50
Minimum value (exclusive)	0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default	0.05
Range	(0, 1)

avgOnly=TRUE | FALSE

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias	averageOnly
Default	TRUE

* casOut={casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>}

specifies a list of one or more variables to be copied from the input table to the output table. You can alternatively specify the value ALL, ALL_MODEL, or ALL_NUMERIC, which respectively copies all variables, all variables used in the modeling, or all numeric variables from the input table to the output table.

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default	0.5
Range	(0, 1)

lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases	p
Aliases	predicted

resid="string"

names the residual.

Aliases	r
Aliases	residual

role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default	0

test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range	0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form	partByVar={name="variable-name"}
Shortcut form	partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

specifies the formatted value of the variable that is used to assign observations to the training role. If you do not specify the train parameter, then all observations whose roles are not determined by the test and validate parameters are assigned to training.

quantileBin=TRUE | FALSE

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases	qbin
Aliases	qtbin
Default	TRUE

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout={casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default	0
Range	0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases	savemodel
	save
	savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=TRUE | FALSE

when set to True, stores data in memory when the model is trained.

Default	FALSE

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default	0.95
Range	(0, 1)

depthPower=double

specifies the power parameter used to compute the probability of splitting an internal node as a function of its depth from the root. A larger depth power value decreases the probability of splitting a node.

Default	2
Minimum value	0

pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

Lua Syntax
Summary: Input and Output Tables
Parameter Descriptions

Lua Syntax

results, info = s:bart_bartProbit{

alpha=double,

applyRowOrder=true | false,

attributes={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

class={{

descending=true | false,

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",

ref="FIRST" | "LAST" | double | "string",

vars={"variable-name-1" <, "variable-name-2", ...>}

}, {...}},

differences={{

evtMargin="string",

label="string",

name="string",

refMargin="string"

}, {...}},

display={

caseSensitive=true | false,

exclude=true | false,

excludeAll=true | false,

keyIsPath=true | false,

names={"string-1" <, "string-2", ...>},

pathType="LABEL" | "NAME",

traceNames=true | false

distributeChains=integer,

freq="variable-name",

inputs={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

leafSigmaK=double,

margins={{

at={{

value="string" | double,

var="string"

}, {...}},

label="string",

name="string"

}, {...}},

maxTrainTime=double,

minLeafSize=integer,

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE",

model={

depVars={{

name="variable-name"

}, {...}},

effects={{

vars={"string-1" <, "string-2", ...>}

}, {...}}

nBI=integer,

nBins=integer,

nClassLevelsPrint=integer,

nMC=integer,

nominals={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

nThin=integer,

nTree=integer,

obsLeafMapInMem=true | false,

offset="variable-name",

orderSplit=integer,

output={

alpha=double,

avgOnly=true | false,

casOut={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>},

into="string",

intoCutPt=double,

lcl="string",

pred="string",

resid="string",

role="string",

ucl="string"

outputMargins={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

outputTables={

groupByVarsRaw=true | false,

includeAll=true | false,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=true | false,

replace=true | false

partByFrac={

seed=integer,

test=double

partByVar={

name="variable-name",

test="string",

train="string"

quantileBin=true | false,

sampleSummary={

avgNode="string",

casout={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

propAccepted="string",

sampSaved="string"

seed=64-bit-integer,

store={

caslib="string",

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

table={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

target="variable-name",

trainInMem=true | false,

treePrior={

depthBase=double,

depthPower=double,

pPrune=double,

pSplit=double

}

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the input data table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut	creates a table on the server that contains observationwise statistics, which are computed after the model is fit.
outputMargins	—
outputTables	names	lists the names of results tables to save as CAS tables on the server.
sampleSummary	required parametercasout	creates a table on the server that contains a summary of the sum-of-trees ensemble samples.
store	—	stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default	0.05
Range	(0, 1)

applyRowOrder=true | false

Default	false

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	attribute

class={{classStatement-1} <, {classStatement-2}, ...>}

names the classification variables to use as explanatory variables in the analysis.

Alias	classVars

The classStatement value can be one or more of the following:

descending=true | false

when set to True, reverses the sort order that is imposed by the order parameter.

Default	false

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

* vars={"variable-name-1" <, "variable-name-2", ...>}

specifies the classification variables.

Alias	name

differences={{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>}

specifies differences of predictive margins.

Alias	diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias	evtScen

label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias	refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

Minimum value	0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default	2
Minimum value (exclusive)	0

margins={{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>}

specifies a predictive margin.

Alias	scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at={{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>}

specifies the variables to modify in a predictive margin and the values they are set to.

Alias	evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias	variable

label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias	maxTime
Minimum value (exclusive)	0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias	leafSize
Default	5
Minimum value	1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default	SEPARATE

MACBIG

MACSMALL

NONE

SEPARATE

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars={{responsevar-1} <, {responsevar-2}, ...>}

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases	depVar
Aliases	target

name="variable-name"

names the response variable.

effects={{effect-1} <, {effect-2}, ...>}

* vars={"string-1" <, "string-2", ...>}

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias	burnin
Default	100
Minimum value	1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default	50
Minimum value	2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value	0

nMC=integer

Default	1000
Minimum value	1

nominals={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias	thin
Default	1
Minimum value	1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default	200
Minimum value	1

obsLeafMapInMem=true | false

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default	false

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default	50
Minimum value (exclusive)	0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default	0.05
Range	(0, 1)

avgOnly=true | false

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias	averageOnly
Default	true

* casOut={casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | {"variable-name-1" <, "variable-name-2", ...>}

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default	0.5
Range	(0, 1)

lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases	p
Aliases	predicted

resid="string"

names the residual.

Aliases	r
Aliases	residual

role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default	0

test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range	0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form	partByVar={name="variable-name"}
Shortcut form	partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

quantileBin=true | false

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases	qbin
Aliases	qtbin
Default	true

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout={casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default	0
Range	0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases	savemodel
	save
	savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=true | false

when set to True, stores data in memory when the model is trained.

Default	false

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default	0.95
Range	(0, 1)

depthPower=double

Default	2
Minimum value	0

pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

Python Syntax
Summary: Input and Output Tables
Parameter Descriptions

Python Syntax

results=s.bart.bartProbit(

alpha=double,

applyRowOrder=True | False,

attributes=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

class_=[{

"descending":True | False,

"order":"FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",

"ref":"FIRST" | "LAST" | double | "string",

"vars":["variable-name-1" <, "variable-name-2", ...>]

}<, {...}>],

differences=[{

"evtMargin":"string",

"label":"string",

"name":"string",

"refMargin":"string"

}<, {...}>],

display={

"caseSensitive":True | False,

"exclude":True | False,

"excludeAll":True | False,

"keyIsPath":True | False,

"names":["string-1" <, "string-2", ...>],

"pathType":"LABEL" | "NAME",

"traceNames":True | False

distributeChains=integer,

freq="variable-name",

inputs=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

leafSigmaK=double,

margins=[{

"at":[{

"value":"string" | double,

"var":"string"

}<, {...}>],

"label":"string",

"name":"string"

}<, {...}>],

maxTrainTime=double,

minLeafSize=integer,

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE",

model={

"depVars":[{

"name":"variable-name"

}<, {...}>],

"effects":[{

"vars":["string-1" <, "string-2", ...>]

}<, {...}>]

nBI=integer,

nBins=integer,

nClassLevelsPrint=integer,

nMC=integer,

nominals=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

nThin=integer,

nTree=integer,

obsLeafMapInMem=True | False,

offset="variable-name",

orderSplit=integer,

output={

"alpha":double,

"avgOnly":True | False,

"casOut":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"copyVars":"ALL" | "ALL_MODEL" | "ALL_NUMERIC" | ["variable-name-1" <, "variable-name-2", ...>],

"into":"string",

"intoCutPt":double,

"lcl":"string",

"pred":"string",

"resid":"string",

"role":"string",

"ucl":"string"

outputMargins={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

outputTables={

"groupByVarsRaw":True | False,

"includeAll":True | False,

"names":["string-1" <, "string-2", ...>] | {"key-1":{casouttable-1} <, "key-2":{casouttable-2}, ...>},

"repeated":True | False,

"replace":True | False

partByFrac={

"seed":integer,

"test":double

partByVar={

"name":"variable-name",

"test":"string",

"train":"string"

quantileBin=True | False,

sampleSummary={

"avgNode":"string",

"casout":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"propAccepted":"string",

"sampSaved":"string"

seed=64-bit-integer,

store={

"caslib":"string",

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

table={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

target="variable-name",

trainInMem=True | False,

treePrior={

"depthBase":double,

"depthPower":double,

"pPrune":double,

"pSplit":double

}

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the input data table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut	creates a table on the server that contains observationwise statistics, which are computed after the model is fit.
outputMargins	—
outputTables	names	lists the names of results tables to save as CAS tables on the server.
sampleSummary	required parametercasout	creates a table on the server that contains a summary of the sum-of-trees ensemble samples.
store	—	stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default	0.05
Range	(0, 1)

applyRowOrder=True | False

Default	False

attributes=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	attribute

class_=[{classStatement-1} <, {classStatement-2}, ...>]

names the classification variables to use as explanatory variables in the analysis.

Alias	classVars

The classStatement value can be one or more of the following:

"descending":True | False

when set to True, reverses the sort order that is imposed by the order parameter.

Default	False

"order":"FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

"ref":"FIRST" | "LAST" | double | "string"

* "vars":["variable-name-1" <, "variable-name-2", ...>]

specifies the classification variables.

Alias	name

differences=[{bartScoreMargin_scoreDiff-1} <, {bartScoreMargin_scoreDiff-2}, ...>]

specifies differences of predictive margins.

Alias	diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* "evtMargin":"string"

specifies the event predictive margin by its name.

Alias	evtScen

"label":"string"

labels the difference in predictive margins in output tables.

"name":"string"

names the difference in predictive margins in output tables.

* "refMargin":"string"

specifies the reference predictive margin by its name.

Alias	refScen

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

Minimum value	0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default	2
Minimum value (exclusive)	0

margins=[{bartScoreMargin_evaluate-1} <, {bartScoreMargin_evaluate-2}, ...>]

specifies a predictive margin.

Alias	scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

"at":[{bartScoreMargin_varValue-1} <, {bartScoreMargin_varValue-2}, ...>]

specifies the variables to modify in a predictive margin and the values they are set to.

Alias	evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* "value":"string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* "var":"string"

names a variable to modify in a predictive margin.

Alias	variable

"label":"string"

labels the predictive margin in output tables.

* "name":"string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias	maxTime
Minimum value (exclusive)	0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias	leafSize
Default	5
Minimum value	1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default	SEPARATE

MACBIG

MACSMALL

NONE

SEPARATE

model={bartProbitModel}

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

"depVars":[{responsevar-1} <, {responsevar-2}, ...>]

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases	depVar
Aliases	target

"name":"variable-name"

names the response variable.

"effects":[{effect-1} <, {effect-2}, ...>]

* "vars":["string-1" <, "string-2", ...>]

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias	burnin
Default	100
Minimum value	1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default	50
Minimum value	2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value	0

nMC=integer

Default	1000
Minimum value	1

nominals=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias	thin
Default	1
Minimum value	1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default	200
Minimum value	1

obsLeafMapInMem=True | False

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default	False

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default	50
Minimum value (exclusive)	0

output={bartBinOutputStatement}

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

"alpha":double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default	0.05
Range	(0, 1)

"avgOnly":True | False

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias	averageOnly
Default	True

* "casOut":{casouttable}

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"copyVars":"ALL" | "ALL_MODEL" | "ALL_NUMERIC" | ["variable-name-1" <, "variable-name-2", ...>]

"into":"string"

names the predicted response level. The default name is Into.

"intoCutPt":double

specifies the predicted event probability that determines the predicted binary response level.

Default	0.5
Range	(0, 1)

"lcl":"string"

names the equal-tail lower credible limit.

"pred":"string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases	p
Aliases	predicted

"resid":"string"

names the residual.

Aliases	r
Aliases	residual

"role":"string"

identifies the training and test roles for observations.

"ucl":"string"

names the equal-tail upper credible limit.

outputMargins={casouttable}

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

partByFrac={partByFracStatement}

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

"seed":integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default	0

"test":double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range	0–1

partByVar={partByVarStatement}

names the variable and its values used to partition the data into training and testing roles.

Long form	partByVar={"name":"variable-name"}
Shortcut form	partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* "name":"variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

"test":"string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

"train":"string"

quantileBin=True | False

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases	qbin
Aliases	qtbin
Default	True

sampleSummary={bartProbit_sampleSummary}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

"avgNode":"string"

names the variable that contains average number of nodes per tree in the sample.

* "casout":{casouttable}

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"propAccepted":"string"

names the variable that contains proportion of accepted tree modifications.

"sampSaved":"string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default	0
Range	0–4294967295

store={casouttablebasic}

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases	savemodel
	save
	savestate

* table={castable}

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=True | False

when set to True, stores data in memory when the model is trained.

Default	False

treePrior={bart_treePrior}

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

"depthBase":double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default	0.95
Range	(0, 1)

"depthPower":double

Default	2
Minimum value	0

"pPrune":double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

"pSplit":double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

bartProbit Action

Fits probit Bayesian additive regression trees (BART) models to binary distributed response data..

R Syntax
Summary: Input and Output Tables
Parameter Descriptions

R Syntax

results <– cas.bart.bartProbit(s,

alpha=double,

applyRowOrder=TRUE | FALSE,

attributes=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

class=list( list(

descending=TRUE | FALSE,

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL",

ref="FIRST" | "LAST" | double | "string",

vars=list("variable-name-1" <, "variable-name-2", ...>)

) <, list(...)>),

differences=list( list(

evtMargin="string",

label="string",

name="string",

refMargin="string"

) <, list(...)>),

display=list(

caseSensitive=TRUE | FALSE,

exclude=TRUE | FALSE,

excludeAll=TRUE | FALSE,

keyIsPath=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>),

pathType="LABEL" | "NAME",

traceNames=TRUE | FALSE

distributeChains=integer,

freq="variable-name",

inputs=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

leafSigmaK=double,

margins=list( list(

at=list( list(

value="string" | double,

var="string"

) <, list(...)>),

label="string",

name="string"

) <, list(...)>),

maxTrainTime=double,

minLeafSize=integer,

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE",

model=list(

depVars=list( list(

name="variable-name"

) <, list(...)>),

effects=list( list(

vars=list("string-1" <, "string-2", ...>)

) <, list(...)>)

nBI=integer,

nBins=integer,

nClassLevelsPrint=integer,

nMC=integer,

nominals=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

nThin=integer,

nTree=integer,

obsLeafMapInMem=TRUE | FALSE,

offset="variable-name",

orderSplit=integer,

output=list(

alpha=double,

avgOnly=TRUE | FALSE,

casOut=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | list("variable-name-1" <, "variable-name-2", ...>),

into="string",

intoCutPt=double,

lcl="string",

pred="string",

resid="string",

role="string",

ucl="string"

outputMargins=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

outputTables=list(

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>) | list(key-1=list(casouttable-1) <, key-2=list(casouttable-2), ...>),

repeated=TRUE | FALSE,

replace=TRUE | FALSE

partByFrac=list(

seed=integer,

test=double

partByVar=list(

name="variable-name",

test="string",

train="string"

quantileBin=TRUE | FALSE,

sampleSummary=list(

avgNode="string",

casout=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

propAccepted="string",

sampSaved="string"

seed=64-bit-integer,

store=list(

caslib="string",

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

table=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

target="variable-name",

trainInMem=TRUE | FALSE,

treePrior=list(

depthBase=double,

depthPower=double,

pPrune=double,

pSplit=double

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the input data table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut	creates a table on the server that contains observationwise statistics, which are computed after the model is fit.
outputMargins	—
outputTables	names	lists the names of results tables to save as CAS tables on the server.
sampleSummary	required parametercasout	creates a table on the server that contains a summary of the sum-of-trees ensemble samples.
store	—	stores the model in a binary table object that you can use for scoring.

Parameter Descriptions

alpha=double

specifies the significance level to use for constructing equal-tail credible limits for predictive margins.

Default	0.05
Range	(0, 1)

applyRowOrder=TRUE | FALSE

Default	FALSE

attributes=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

changes the attributes of variables used in the action. Currently, attributes specified on the inputs and nominal parameters are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	attribute

class=list( list(classStatement-1) <, list(classStatement-2), ...>)

names the classification variables to use as explanatory variables in the analysis.

Alias	classVars

The classStatement value can be one or more of the following:

descending=TRUE | FALSE

when set to True, reverses the sort order that is imposed by the order parameter.

Default	FALSE

order="FORMATTED" | "FREQ" | "FREQFORMATTED" | "FREQINTERNAL" | "INTERNAL"

specifies the sort order for the levels of the classification variable. This ordering determines which parameters in the model correspond to each level in the data.

ref="FIRST" | "LAST" | double | "string"

* vars=list("variable-name-1" <, "variable-name-2", ...>)

specifies the classification variables.

Alias	name

differences=list( list(bartScoreMargin_scoreDiff-1) <, list(bartScoreMargin_scoreDiff-2), ...>)

specifies differences of predictive margins.

Alias	diffs

The bartScoreMargin_scoreDiff value can be one or more of the following:

* evtMargin="string"

specifies the event predictive margin by its name.

Alias	evtScen

label="string"

labels the difference in predictive margins in output tables.

name="string"

names the difference in predictive margins in output tables.

* refMargin="string"

specifies the reference predictive margin by its name.

Alias	refScen

display=list(displayTables)

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

distributeChains=integer

Minimum value	0

freq="variable-name"

names the numeric variable that contains the frequency of occurrence for each observation.

inputs=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the input variables to use in the analysis.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	input

leafSigmaK=double

specifies the value used to determine the prior variance for the leaf parameter.

Default	2
Minimum value (exclusive)	0

margins=list( list(bartScoreMargin_evaluate-1) <, list(bartScoreMargin_evaluate-2), ...>)

specifies a predictive margin.

Alias	scenarios

The bartScoreMargin_evaluate value can be one or more of the following:

at=list( list(bartScoreMargin_varValue-1) <, list(bartScoreMargin_varValue-2), ...>)

specifies the variables to modify in a predictive margin and the values they are set to.

Alias	evaluate

The bartScoreMargin_varValue value can be one or more of the following:

* value="string" | double

specifies the value a variable is set to in the predictive margin. For continuous variables, a numeric value is specified. For classification variables, the formatted level is specified.

* var="string"

names a variable to modify in a predictive margin.

Alias	variable

label="string"

labels the predictive margin in output tables.

* name="string"

names the predictive margin in output tables.

maxTrainTime=double

specifies an upper limit (in seconds) on the time for MCMC sampling.

Alias	maxTime
Minimum value (exclusive)	0

minLeafSize=integer

specifies the minimum number of observations that each child of a split must contain in the training data in order for the split to be considered.

Alias	leafSize
Default	5
Minimum value	1

missing="MACBIG" | "MACSMALL" | "NONE" | "SEPARATE"

specifies how to handle missing values in predictor variables.

Default	SEPARATE

MACBIG

MACSMALL

NONE

SEPARATE

model=list(bartProbitModel)

names the dependent variable and explanatory effects.

The bartProbitModel value can be one or more of the following:

depVars=list( list(responsevar-1) <, list(responsevar-2), ...>)

specifies one or more variables to use as response variables in the model. Not all models support more than one response variable.

Aliases	depVar
Aliases	target

name="variable-name"

names the response variable.

effects=list( list(effect-1) <, list(effect-2), ...>)

* vars=list("string-1" <, "string-2", ...>)

specifies the variables to use in defining a term of the effect. You must specify at least one variable.

nBI=integer

specifies the number of burn-in iterations to perform before the action starts to save samples for prediction.

Alias	burnin
Default	100
Minimum value	1

nBins=integer

specifies the number of bins to use for binning continuous input variables.

Default	50
Minimum value	2

nClassLevelsPrint=integer

limits the display of class levels. The value 0 suppresses all levels.

Minimum value	0

nMC=integer

Default	1000
Minimum value	1

nominals=list( list(casinvardesc-1) <, list(casinvardesc-2), ...>)

specifies the nominal input variables to use in the analysis.

For more information about specifying the nominals parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Alias	nominal

nThin=integer

specifies the thinning rate of the simulation.

Alias	thin
Default	1
Minimum value	1

nTree=integer

specifies the number of trees in a sample of the sum-of-trees ensemble.

Default	200
Minimum value	1

obsLeafMapInMem=TRUE | FALSE

when set to True, stores a mapping of each observation to terminal nodes in memory when the model is trained.

Default	FALSE

offset="variable-name"

specifies a numeric offset variable. This variable cannot be a classification variable, a response variable, or one of the explanatory variables.

orderSplit=integer

specifies the minimum cardinality for which a categorical input uses splitting rules according to level ordering.

Default	50
Minimum value (exclusive)	0

output=list(bartBinOutputStatement)

creates a table on the server that contains observationwise statistics, which are computed after the model is fit.

The bartBinOutputStatement value can be one or more of the following:

alpha=double

specifies the significance level to use for the construction of all equal-tail credible limits.

Default	0.05
Range	(0, 1)

avgOnly=TRUE | FALSE

when set to FALSE, predictions from each MCMC sample are included in the output table in addition to the sample average predictions.

Alias	averageOnly
Default	TRUE

* casOut=list(casouttable)

specifies the settings for an output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

copyVars="ALL" | "ALL_MODEL" | "ALL_NUMERIC" | list("variable-name-1" <, "variable-name-2", ...>)

into="string"

names the predicted response level. The default name is Into.

intoCutPt=double

specifies the predicted event probability that determines the predicted binary response level.

Default	0.5
Range	(0, 1)

lcl="string"

names the equal-tail lower credible limit.

pred="string"

names the predicted value. If you do not specify any output statistics, then the predicted value is named Pred by default.

Aliases	p
Aliases	predicted

resid="string"

names the residual.

Aliases	r
Aliases	residual

role="string"

identifies the training and test roles for observations.

ucl="string"

names the equal-tail upper credible limit.

outputMargins=list(casouttable)

For more information about specifying the outputMargins parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables=list(outputTables)

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

partByFrac=list(partByFracStatement)

specifies the fraction of the data to be used for testing.

The partByFracStatement value can be one or more of the following:

seed=integer

specifies the seed to use in the random number generator that is used for partitioning the data.

Default	0

test=double

randomly assigns the specified proportion of observations in the input table to the testing role. The sum of the fractions that are specified in the test and validate parameters must be less than 1.

Range	0–1

partByVar=list(partByVarStatement)

names the variable and its values used to partition the data into training and testing roles.

Long form	partByVar=list(name="variable-name")
Shortcut form	partByVar="variable-name"

The partByVarStatement value can be one or more of the following:

* name="variable-name"

names the variable in the input table whose values are used to assign roles to each observation.

test="string"

specifies the formatted value of the variable that is used to assign observations to the testing role.

train="string"

quantileBin=TRUE | FALSE

when set to True, specifies that bin boundaries are set at quantiles of numeric inputs instead of bins of equal width.

Aliases	qbin
Aliases	qtbin
Default	TRUE

sampleSummary=list(bartProbit_sampleSummary)

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

The bartProbit_sampleSummary value can be one or more of the following:

avgNode="string"

names the variable that contains average number of nodes per tree in the sample.

* casout=list(casouttable)

creates a table on the server that contains a summary of the sum-of-trees ensemble samples.

For more information about specifying the casout parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

propAccepted="string"

names the variable that contains proportion of accepted tree modifications.

sampSaved="string"

names the variable that contains an indicator for whether the sample is saved for prediction.

seed=64-bit-integer

specifies a seed for starting the pseudorandom number generator.

Default	0
Range	0–4294967295

store=list(casouttablebasic)

stores the model in a binary table object that you can use for scoring.

For more information about specifying the store parameter, see the common casouttablebasic parameter (Appendix A: Common Parameters).

Aliases	savemodel
	save
	savestate

* table=list(castable)

specifies the input data table.

For more information about specifying the table parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

target="variable-name"

specifies the target variable.

trainInMem=TRUE | FALSE

when set to True, stores data in memory when the model is trained.

Default	FALSE

treePrior=list(bart_treePrior)

specifies the regularization prior for the sum-of-trees ensemble.

The bart_treePrior value can be one or more of the following:

depthBase=double

specifies the base probability for splitting an internal node as a function of its depth from the root. A larger base probability value makes splitting a node more likely.

Default	0.95
Range	(0, 1)

depthPower=double

Default	2
Minimum value	0

pPrune=double

specifies the probability of sampling the operation of pruning a pair of terminal nodes for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

pSplit=double

specifies the probability of sampling the operation of splitting a terminal node for the tree sampling algorithm. If you specify the pSplit and pPrune parameters, their values must sum to 1.

Default	0.5
Range	(0, 1)

Last updated: March 05, 2026