Robust PCA Action Set

mu=double,

nThreads=integer,

onRpca=TRUE | FALSE,

output={

casOut={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

nPC=integer,

nPCUsed=integer,

pcAngles=TRUE | FALSE,

randSeed=integer,

standardPc=TRUE | FALSE,

warmupSize=integer,

winsummary={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outputTables={

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=TRUE | FALSE,

replace=TRUE | FALSE

stepSize=integer,

svdMaxRank=integer,

table={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tolerance=double,

windowSize=integer

;

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut, winsummary	specifies a list of parameters for the output table.
outputTables	names	lists the names of results tables to save as CAS tables on the server.

Parameter Descriptions

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=TRUE | FALSE

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	FALSE

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=TRUE | FALSE

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	FALSE

* id="variable-name"

specifies the name of a numeric variable that identifies observations in the input data table. The values need to be a series of numbers that have a common difference. The action stops if this column contains a missing value.

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the numeric variables to be analyzed. If you omit this parameter, all numeric variables that are not specified in other parameters are analyzed. You cannot specify this parameter with the image parameter.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda=double

specifies the value of the coefficient in the objective function (lambda), which is multiplied by the L1 norm of the sparse matrix in the objective function. The default value is computed as 1 divided by the square root of the number of observations or the number of variables in the input table, whichever is greater.

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

onRpca=TRUE | FALSE

when set to True, performs the robust principal component analysis method at each window.

Default	FALSE

output={mwpcaOutput}

specifies a list of parameters for the output table.

The mwpcaOutput value can be one or more of the following:

* casOut={casouttable}

specifies the name of the output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

nPC=integer

specifies the number of principal components to be displayed for each window in the output table.

Alias	nPCDisplayed
Default	1
Range	1–100

nPCUsed=integer

specifies the number of principal components to be used for each window computing angle change in the output table.

Default	1
Range	1–100

pcAngles=TRUE | FALSE

when set to True, provides the angle change and the absolute angle of the principal components in the output table.

Default	FALSE

randSeed=integer

specifies the seed of random in warmup steps.

Default	0
Minimum value	0

standardPc=TRUE | FALSE

when set to True, standardizes the principal components in the output table.

Default	FALSE

warmupSize=integer

specifies the warmup size. This parameter valid only when the number of principal components are greater than one. The default value is 0.

Default	0
Minimum value	0

winsummary={casouttable}

specifies the name of the output table that contains the solution summary for each window.

For more information about specifying the winsummary parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

* stepSize=integer

specifies the length of the step size.

Default	100
Minimum value	1

svdIncremental="FULL" | "INC"

specifies the type of singular value decomposition to use.

Alias	svdType
Default	FULL

FULL

uses the full singular value decomposition method.

INC

uses the incremental singular value decomposition method.

svdMaxRank=integer

specifies the maximum value of rank to be considered in the singular value decomposition solver. The default value is the smaller of the number of observations and the number of variables in the input table.

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

* table={castable}

specifies the settings for an input table.

Long form	table={name="table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

caslib="string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

computedOnDemand=TRUE | FALSE

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	FALSE

computedVars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the names of the computed variables to create. Specify an expression for each variable in the computedVarsProgram parameter. If you do not specify this parameter, then all variables from computedVarsProgram are automatically included.

Alias	compVars

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

computedVarsProgram="string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the input table.

orderBy={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use for ordering observations within partitions. This parameter applies to partitioned tables, or it can be combined with variables that are specified in the groupBy parameter when the value of the groupByMode parameter is set to REDISTRIBUTE.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

singlePass=TRUE | FALSE

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	FALSE

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the input data.

whereTable={groupbytable}

specifies an input table that contains rows to use as a WHERE filter. If the vars parameter is not specified, then all the variable names that are common to the input table and the filtering table are used to find matching rows. If the where parameter for the input table and this parameter are specified, then this filtering table is applied first.

The groupbytable value can be one or more of the following:

casLib="string"

specifies the caslib for the filter table. By default, the active caslib is used.

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the filter table.

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

* windowSize=integer

specifies the length of the window. This parameter is required.

Default	5000
Minimum value	2

mwpca Action

Performs principal component analysis based on a series of moving windows.

Lua Syntax
Summary: Input and Output Tables
Parameter Descriptions

Lua Syntax

results, info = s:robustPca_mwpca{

attributes={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

center=true | false,

cumEigPctTol=double,

display={

caseSensitive=true | false,

exclude=true | false,

excludeAll=true | false,

keyIsPath=true | false,

names={"string-1" <, "string-2", ...>},

pathType="LABEL" | "NAME",

traceNames=true | false

fixedMu=true | false,

id="variable-name",

inputs={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

lambda=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

onRpca=true | false,

output={

casOut={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

nPC=integer,

nPCUsed=integer,

pcAngles=true | false,

randSeed=integer,

standardPc=true | false,

warmupSize=integer,

winsummary={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outputTables={

groupByVarsRaw=true | false,

includeAll=true | false,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=true | false,

replace=true | false

stepSize=integer,

svdMaxRank=integer,

table={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tolerance=double,

windowSize=integer

}

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut, winsummary	specifies a list of parameters for the output table.
outputTables	names	lists the names of results tables to save as CAS tables on the server.

Parameter Descriptions

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=true | false

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	false

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=true | false

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	false

* id="variable-name"

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda=double

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

onRpca=true | false

when set to True, performs the robust principal component analysis method at each window.

Default	false

output={mwpcaOutput}

specifies a list of parameters for the output table.

The mwpcaOutput value can be one or more of the following:

* casOut={casouttable}

specifies the name of the output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

nPC=integer

specifies the number of principal components to be displayed for each window in the output table.

Alias	nPCDisplayed
Default	1
Range	1–100

nPCUsed=integer

specifies the number of principal components to be used for each window computing angle change in the output table.

Default	1
Range	1–100

pcAngles=true | false

when set to True, provides the angle change and the absolute angle of the principal components in the output table.

Default	false

randSeed=integer

specifies the seed of random in warmup steps.

Default	0
Minimum value	0

standardPc=true | false

when set to True, standardizes the principal components in the output table.

Default	false

warmupSize=integer

specifies the warmup size. This parameter valid only when the number of principal components are greater than one. The default value is 0.

Default	0
Minimum value	0

winsummary={casouttable}

specifies the name of the output table that contains the solution summary for each window.

For more information about specifying the winsummary parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

* stepSize=integer

specifies the length of the step size.

Default	100
Minimum value	1

svdIncremental="FULL" | "INC"

specifies the type of singular value decomposition to use.

Alias	svdType
Default	FULL

FULL

uses the full singular value decomposition method.

INC

uses the incremental singular value decomposition method.

svdMaxRank=integer

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

* table={castable}

specifies the settings for an input table.

Long form	table={name="table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

caslib="string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

computedOnDemand=true | false

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	false

computedVars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

Alias	compVars

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

computedVarsProgram="string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the input table.

orderBy={{casinvardesc-1} <, {casinvardesc-2}, ...>}

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

singlePass=true | false

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	false

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the input data.

whereTable={groupbytable}

The groupbytable value can be one or more of the following:

casLib="string"

specifies the caslib for the filter table. By default, the active caslib is used.

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the filter table.

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

* windowSize=integer

specifies the length of the window. This parameter is required.

Default	5000
Minimum value	2

mwpca Action

Performs principal component analysis based on a series of moving windows.

Python Syntax
Summary: Input and Output Tables
Parameter Descriptions

Python Syntax

results=s.robustPca.mwpca(

attributes=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

center=True | False,

cumEigPctTol=double,

display={

"caseSensitive":True | False,

"exclude":True | False,

"excludeAll":True | False,

"keyIsPath":True | False,

"names":["string-1" <, "string-2", ...>],

"pathType":"LABEL" | "NAME",

"traceNames":True | False

fixedMu=True | False,

id="variable-name",

inputs=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

lambda_=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

onRpca=True | False,

output={

"casOut":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"nPC":integer,

"nPCUsed":integer,

"pcAngles":True | False,

"randSeed":integer,

"standardPc":True | False,

"warmupSize":integer,

"winsummary":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

}

outputTables={

"groupByVarsRaw":True | False,

"includeAll":True | False,

"names":["string-1" <, "string-2", ...>] | {"key-1":{casouttable-1} <, "key-2":{casouttable-2}, ...>},

"repeated":True | False,

"replace":True | False

stepSize=integer,

svdMaxRank=integer,

table={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

tolerance=double,

windowSize=integer

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
output	required parametercasOut, winsummary	specifies a list of parameters for the output table.
outputTables	names	lists the names of results tables to save as CAS tables on the server.

Parameter Descriptions

attributes=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=True | False

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	False

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=True | False

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	False

* id="variable-name"

inputs=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda_=double

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

onRpca=True | False

when set to True, performs the robust principal component analysis method at each window.

Default	False

output={mwpcaOutput}

specifies a list of parameters for the output table.

The mwpcaOutput value can be one or more of the following:

* "casOut":{casouttable}

specifies the name of the output table.

For more information about specifying the casOut parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"nPC":integer

specifies the number of principal components to be displayed for each window in the output table.

Alias	nPCDisplayed
Default	1
Range	1–100

"nPCUsed":integer

specifies the number of principal components to be used for each window computing angle change in the output table.

Default	1
Range	1–100

"pcAngles":True | False

when set to True, provides the angle change and the absolute angle of the principal components in the output table.

Default	False

"randSeed":integer

specifies the seed of random in warmup steps.

Default	0
Minimum value	0

"standardPc":True | False

when set to True, standardizes the principal components in the output table.

Default	False

"warmupSize":integer

specifies the warmup size. This parameter valid only when the number of principal components are greater than one. The default value is 0.

Default	0
Minimum value	0

"winsummary":{casouttable}

specifies the name of the output table that contains the solution summary for each window.

For more information about specifying the winsummary parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

* stepSize=integer

specifies the length of the step size.

Default	100
Minimum value	1

svdIncremental="FULL" | "INC"

specifies the type of singular value decomposition to use.

Alias	svdType
Default	FULL

FULL

uses the full singular value decomposition method.

INC

uses the incremental singular value decomposition method.

svdMaxRank=integer

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

* table={castable}

specifies the settings for an input table.

Long form	table={"name":"table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

"caslib":"string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

"computedOnDemand":True | False

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	False

"computedVars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

Alias	compVars

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"computedVarsProgram":"string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import_

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* "name":"table-name"

specifies the name of the input table.

"orderBy":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"singlePass":True | False

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	False

"vars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"where":"where-expression"

specifies an expression for subsetting the input data.

"whereTable":{groupbytable}

The groupbytable value can be one or more of the following:

"casLib":"string"

specifies the caslib for the filter table. By default, the active caslib is used.

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import_

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* "name":"table-name"

specifies the name of the filter table.

"vars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"where":"where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

* windowSize=integer

specifies the length of the window. This parameter is required.

Default	5000
Minimum value	2

mwpca Action

Performs principal component analysis based on a series of moving windows.

R Syntax
Summary: Input and Output Tables
Parameter Descriptions

R Syntax

results <– cas.robustPca.mwpca(s,

attributes=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

center=TRUE | FALSE,

cumEigPctTol=double,

display=list(

caseSensitive=TRUE | FALSE,

exclude=TRUE | FALSE,

excludeAll=TRUE | FALSE,

keyIsPath=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>),

pathType="LABEL" | "NAME",

traceNames=TRUE | FALSE

fixedMu=TRUE | FALSE,

id="variable-name",

inputs=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

lambda=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

onRpca=TRUE | FALSE,

output=list(

casOut=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

nPC=integer,

nPCUsed=integer,

pcAngles=TRUE | FALSE,

randSeed=integer,

standardPc=TRUE | FALSE,

warmupSize=integer,

winsummary=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

)

outputTables=list(

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>) | list(key-1=list(casouttable-1) <, key-2=list(casouttable-2), ...>),

repeated=TRUE | FALSE,

replace=TRUE | FALSE

stepSize=integer,

svdMaxRank=integer,