Robust PCA Action Set

display={

caseSensitive=TRUE | FALSE,

exclude=TRUE | FALSE,

excludeAll=TRUE | FALSE,

keyIsPath=TRUE | FALSE,

names={"string-1" <, "string-2", ...>},

pathType="LABEL" | "NAME",

traceNames=TRUE | FALSE

fixedMu=TRUE | FALSE,

freq="variable-name",

icaMaxiter=integer,

id={"variable-name-1" <, "variable-name-2", ...>},

image="variable-name",

inputs={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

lambda=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

numSigVars=integer,

outMat={

errMat={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

lowRankMat={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

sparseMat={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outPca={

pcLoadings={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

pcScores={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outputTables={

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=TRUE | FALSE,

replace=TRUE | FALSE

outSvd={

svdDiag={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

svdLeft={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

svdRight={

caslib="string"

compress=TRUE | FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

pcPrefix="string",

saveState={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

scale=TRUE | FALSE,

sigmaCoef=double,

svdMaxRank=integer,

svdRand={

power=integer,

randSeed=integer

table={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tolerance=double,

useMatrix=TRUE | FALSE

;

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
code	casOut	produces SAS score code. This parameter is disabled if you specify the image parameter.
colStatistics	—	specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.
outMat	lowRankMat, sparseMat, errMat	specifies a list of parameters for the output tables of the robust principal component analysis method.
outPca	pcLoadings, pcScores	specifies a list of parameters for the output tables of the principal component analysis.
outSvd	svdDiag, svdLeft, svdRight	specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.
outputTables	names	lists the names of results tables to save as CAS tables on the server.
saveState	—	specifies the output data table in which to save the scoring results to be used in the score action of the aStore action set. You can specify the RPCA_PROJECTION_TYPE subparameter in the options parameter in the score action: the value 0 projects the scoring observations onto the principal component space; the value 1 projects the scoring observations onto the low-rank subspace; the value 2 projects the scoring observations onto the low-rank subspace, but the sparse part of the scoring data is stored in the scoring results table. The value 0 is not available if you generate the table by using the image parameter.

Parameter Descriptions

anomalyDetection=TRUE | FALSE

when set to True, uses a subsequent score action for anomaly detection.

Aliases	anomaly
Aliases	AD
Default	FALSE

anomalyDetectionMethod=integer

specifies the method of anomaly detection. If this value is set to 0, the SIGVARS method for anomaly detection is used. If this value is set to 1, the R4S method for anomaly detection is used. If this value is set to 2, the ICA-SIGVARS method for anomaly detection is used. If this value is set to 3, the ICA-NORMS method for anomaly detection is used. For more information about these anomaly detection methods, see the Details section. You can override this parameter by specifying the following values in the respective subparameters of the options parameter in the score action: specify RPCA_ANOMALYDETECTION_METHOD as the value of the name subparameter, and specify the override value in the value subparameter.

Alias	ADMethod
Default	0
Range	0–3

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=TRUE | FALSE

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	FALSE

code={rpcaCodegen}

produces SAS score code. This parameter is disabled if you specify the image parameter.

The rpcaCodegen value can be one or more of the following:

casOut={casouttable}

specifies the settings for an output table.

The casouttable value can be one or more of the following:

caslib="string"

specifies the name of the caslib for the output table.

compress=TRUE | FALSE

when set to True, applies data compression to the table.

Default	FALSE

indexVars={"variable-name-1" <, "variable-name-2", ...>}

specifies the list of variables to create indexes for in the output data.

label="string"

specifies the descriptive label to associate with the table.

lifetime=64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

maxMemSize=64-bit-integer

specifies the maximum amount of memory, in bytes, that each thread should allocate for in-memory blocks before converting to a memory-mapped file. Files are written in the directories that are specified in the CAS_DISK_CACHE environment variable.

TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

memoryFormat="DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

use the default memory format that is set for the server. By default, the server uses the standard memory format. If an administrator sets the CAS_DEFAULT_MEMORY_FORMAT environment variable to DVR, then the DVR memory format is set as the default for the server.

STANDARD

use the standard memory format.

name="table-name"

specifies the name for the output table.

promote=TRUE | FALSE

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	FALSE

replace=TRUE | FALSE

when set to True, overwrites an existing table that has the same name.

Default	FALSE

replication=integer

specifies the number of copies of the table to make for fault tolerance. Larger values result in slower performance and use more memory, but provide high availability for data in the event of a node failure. Data redundancy applies to distributed servers only.

Default	1
Minimum value	0

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

threadBlockSize=64-bit-integer

specifies the number of bytes to use for blocks in the output table. The blocks are read by threads. Gradually increase this value when you have a large table with millions or billions of rows and you are tuning for performance. Larger values can increase performance with indexed tables. However, if the value is too large, then you can cause thread starvation due to too few blocks for threads to work on.

Alias	blockSize
Default	1048576
Minimum value	0
TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

timeStamp="string"

specifies to add a timestamp column to the table. Support for timeStamp is action-specific. Specify the value in the form that is appropriate for your session locale.

where={"string-1" <, "string-2", ...>}

specifies one or more expressions for subsetting the output data. When multiple expressions are specified, the expressions are effectively combined using AND to form the final output filter. If an expression contains quoted values, use nested quotation marks.

comment=TRUE | FALSE

when set to True, adds comments to the DATA step code.

Default	FALSE

fmtWdth=integer

specifies the width to use for formatting derived numbers such as parameter estimates in the DATA step code.

Alias	fmtWidth
Default	20
Range	0–32

indentSize=integer

specifies the number of spaces to indent the DATA step code for each level.

Default	3
Range	0–10

labelId=integer

specifies the label ID to use in array names and statement labels in the DATA step code. By default, a random positive integer is used.

lineSize=integer

specifies the line size for the generated code.

Default	120
Range	64–254

noTrim=TRUE | FALSE

when set to True, bases the comparison of variables with formatted values on the full format width with padding. By default, leading and trailing blanks are removed from the formatted values.

Default	FALSE

projectionType="LRS" | "PCA"

specifies the type of scoring.

Default	PCA

LRS

projects the scoring observations onto the low-rank space.

PCA

projects the scoring observations onto the principal components.

tabForm=TRUE | FALSE

when set to True, generates the code in a way that is appropriate for storing in a table.

Alias	tableForm
Default	FALSE

colStatistics={casouttable}

specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.

For more information about specifying the colStatistics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

decomp="NONE" | "PCA" | "SVD"

specifies the decomposition method for the low-rank matrix. If the value of the maxiter parameter is 0, decomposition is applied to the original input data instead of to the low-rank matrix.

Default	NONE

NONE

performs neither principal component analysis nor singular value decomposition.

PCA

performs principal component analysis.

SVD

performs singular value decomposition.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=TRUE | FALSE

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	FALSE

freq="variable-name"

specifies a numeric variable that contains the frequency of occurrence of each observation.

icaMaxiter=integer

specifies the maximum number of iterations of Infomax ICA when training.

Default	100
Range	1–500

icaMethod="FOBI" | "GEOD" | "SYMM" | "UNCON"

specifies the ICA method for RPCA-ICA anomaly detection.

Default	FOBI

FOBI

performs the Fourth Order Blind Identification method for ICA.

GEOD

performs the Geodesic Infomax method for ICA.

SYMM

performs the Symmetric Orthogonalized Infomax method for ICA.

UNCON

performs the unconstrained Infomax method for ICA.

id={"variable-name-1" <, "variable-name-2", ...>}

specifies the variables to use as record identifiers.

image="variable-name"

specifies the name of the column that contains image binaries, encoded as JPG, PNG, TIF, or WIDE. You cannot specify this parameter with the inputs parameter.

Alias	imageVar

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the numeric variables to be analyzed. If you omit this parameter, all numeric variables that are not specified in other parameters are analyzed. You cannot specify this parameter with the image parameter.

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda=double

specifies the value of the coefficient in the objective function (lambda), which is multiplied by the L1 norm of the sparse matrix in the objective function. The default value is computed as 1 divided by the square root of the number of observations or the number of variables in the input table, whichever is greater.

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

numSigVars=integer

specifies the minimum number of significant variables in an observation for it to be considered as an anomaly by the SIGVARS and ICA-SIGVARS method. You can override this parameter by specifying the following values in the respective subparameters of the options parameter in the score action: specify RPCA_NUMSIGVARS as the value of the name subparameter, and specify the override value in the value subparameter.

Default	1
Minimum value	1

outMat={outRpcaTabs}

specifies a list of parameters for the output tables of the robust principal component analysis method.

The outRpcaTabs value can be one or more of the following:

errMat={casouttable}

specifies the name of the output table for the error matrix.

For more information about specifying the errMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outError

lowRankMat={casouttable}

specifies the name of the output table for the low-rank matrix.

For more information about specifying the lowRankMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outLowRank

sparseMat={casouttable}

specifies the name of the output table for the sparse matrix.

For more information about specifying the sparseMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outSparse

outPca={outPcaTabs}

specifies a list of parameters for the output tables of the principal component analysis.

The outPcaTabs value can be one or more of the following:

pcLoadings={casouttable}

specifies the name of the output table for the principal component loadings.

For more information about specifying the pcLoadings parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

pcScores={casouttable}

specifies the name of the output table for the principal component scores.

For more information about specifying the pcScores parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

outSvd={outSvdTabs}

specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.

The outSvdTabs value can be one or more of the following:

svdDiag={casouttable}

specifies the name of the output table for the diagonal vector of the rectangular diagonal matrix.

For more information about specifying the svdDiag parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

svdLeft={casouttable}

specifies the name of the output table for the left-singular vectors.

For more information about specifying the svdLeft parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

svdRight={casouttable}

specifies the name of the output table for the right-singular vectors.

For more information about specifying the svdRight parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

pcPrefix="string"

specifies a prefix for naming the principal components.

Default	"Prin"

saveState={casouttable}

specifies the output data table in which to save the scoring results to be used in the score action of the aStore action set. You can specify the RPCA_PROJECTION_TYPE subparameter in the options parameter in the score action: the value 0 projects the scoring observations onto the principal component space; the value 1 projects the scoring observations onto the low-rank subspace; the value 2 projects the scoring observations onto the low-rank subspace, but the sparse part of the scoring data is stored in the scoring results table. The value 0 is not available if you generate the table by using the image parameter.

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=TRUE | FALSE

when set to True, scales the numeric variables by the standard deviation of each column.

Alias	scaling
Default	FALSE

sigmaCoef=double

specifies the threshold on the standardized sparse value in the SIGVARS method for anomaly detection or a coefficient that is applied to the threshold in the R4S method. You can override this parameter by specifying the following values in the respective subparameters of the options parameter in the score action: specify RPCA_SIGMACOEF as the value of the name subparameter, and specify the override value in the value subparameter.

Default	1
Minimum value	1E-10

svdMaxRank=integer

specifies the maximum value of rank to be considered in the singular value decomposition solver. The default value is the smaller of the number of observations and the number of variables in the input table.

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE" | "RANDOM"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

RANDOM

uses the randomized singular value decomposition method.

svdRand={randomizedSvd}

specifies a list of parameters to use when the value of the svdMethod parameter is RANDOM.

The randomizedSvd value can be one or more of the following:

power=integer

specifies the parameter power.

Default	0
Minimum value	0

randSeed=integer

specifies the seed value.

Default	0
Minimum value	1

* table={castable}

specifies the settings for an input table.

Long form	table={name="table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

caslib="string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

computedOnDemand=TRUE | FALSE

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	FALSE

computedVars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the names of the computed variables to create. Specify an expression for each variable in the computedVarsProgram parameter. If you do not specify this parameter, then all variables from computedVarsProgram are automatically included.

Alias	compVars

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

computedVarsProgram="string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the input table.

orderBy={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use for ordering observations within partitions. This parameter applies to partitioned tables, or it can be combined with variables that are specified in the groupBy parameter when the value of the groupByMode parameter is set to REDISTRIBUTE.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

singlePass=TRUE | FALSE

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	FALSE

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the input data.

whereTable={groupbytable}

specifies an input table that contains rows to use as a WHERE filter. If the vars parameter is not specified, then all the variable names that are common to the input table and the filtering table are used to find matching rows. If the where parameter for the input table and this parameter are specified, then this filtering table is applied first.

The groupbytable value can be one or more of the following:

casLib="string"

specifies the caslib for the filter table. By default, the active caslib is used.

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the filter table.

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

useMatrix=TRUE | FALSE

when set to True, uses the standard deviation of the columns of the sparse matrix to standardize the sparse part of the scoring observation in the anomaly detection methods SIGVARS and R4S. When set to False, the action uses the standard deviation of the columns of the original input data for that purpose. You can override this parameter by specifying the following values in the respective subparameters of the options parameter in the score action: specify RPCA_USEMATRIX as the value of the name subparameter, and specify the override value in the value subparameter.

Default	FALSE

robustpca Action

Performs robust principal component analysis.

Lua Syntax
Summary: Input and Output Tables
Parameter Descriptions

Lua Syntax

results, info = s:robustPca_robustpca{

anomalyDetection=true | false,

anomalyDetectionMethod=integer,

attributes={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

center=true | false,

code={

casOut={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

comment=true | false,

fmtWdth=integer,

indentSize=integer,

labelId=integer,

lineSize=integer,

noTrim=true | false,

projectionType="LRS" | "PCA",

tabForm=true | false

colStatistics={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

cumEigPctTol=double,

display={

caseSensitive=true | false,

exclude=true | false,

excludeAll=true | false,

keyIsPath=true | false,

names={"string-1" <, "string-2", ...>},

pathType="LABEL" | "NAME",

traceNames=true | false

fixedMu=true | false,

freq="variable-name",

icaMaxiter=integer,

id={"variable-name-1" <, "variable-name-2", ...>},

image="variable-name",

inputs={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

lambda=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

numSigVars=integer,

outMat={

errMat={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

lowRankMat={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

sparseMat={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outPca={

pcLoadings={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

pcScores={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

outputTables={

groupByVarsRaw=true | false,

includeAll=true | false,

names={"string-1" <, "string-2", ...>} | {key-1={casouttable-1} <, key-2={casouttable-2}, ...>},

repeated=true | false,

replace=true | false

outSvd={

svdDiag={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

svdLeft={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

svdRight={

caslib="string"

compress=true | false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=true | false

replace=true | false

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where={"string-1" <, "string-2", ...>}

}

pcPrefix="string",

saveState={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

scale=true | false,

sigmaCoef=double,

svdMaxRank=integer,

svdRand={

power=integer,

randSeed=integer

table={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tolerance=double,

useMatrix=true | false

}

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
code	casOut	produces SAS score code. This parameter is disabled if you specify the image parameter.
colStatistics	—	specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.
outMat	lowRankMat, sparseMat, errMat	specifies a list of parameters for the output tables of the robust principal component analysis method.
outPca	pcLoadings, pcScores	specifies a list of parameters for the output tables of the principal component analysis.
outSvd	svdDiag, svdLeft, svdRight	specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.
outputTables	names	lists the names of results tables to save as CAS tables on the server.
saveState	—	specifies the output data table in which to save the scoring results to be used in the score action of the aStore action set. You can specify the RPCA_PROJECTION_TYPE subparameter in the options parameter in the score action: the value 0 projects the scoring observations onto the principal component space; the value 1 projects the scoring observations onto the low-rank subspace; the value 2 projects the scoring observations onto the low-rank subspace, but the sparse part of the scoring data is stored in the scoring results table. The value 0 is not available if you generate the table by using the image parameter.

Parameter Descriptions

anomalyDetection=true | false

when set to True, uses a subsequent score action for anomaly detection.

Aliases	anomaly
Aliases	AD
Default	false

anomalyDetectionMethod=integer

Alias	ADMethod
Default	0
Range	0–3

attributes={{casinvardesc-1} <, {casinvardesc-2}, ...>}

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=true | false

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	false

code={rpcaCodegen}

produces SAS score code. This parameter is disabled if you specify the image parameter.

The rpcaCodegen value can be one or more of the following:

casOut={casouttable}

specifies the settings for an output table.

The casouttable value can be one or more of the following:

caslib="string"

specifies the name of the caslib for the output table.

compress=true | false

when set to True, applies data compression to the table.

Default	false

indexVars={"variable-name-1" <, "variable-name-2", ...>}

specifies the list of variables to create indexes for in the output data.

label="string"

specifies the descriptive label to associate with the table.

lifetime=64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

maxMemSize=64-bit-integer

TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

memoryFormat="DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

STANDARD

use the standard memory format.

name="table-name"

specifies the name for the output table.

promote=true | false

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	false

replace=true | false

when set to True, overwrites an existing table that has the same name.

Default	false

replication=integer

Default	1
Minimum value	0

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

threadBlockSize=64-bit-integer

Alias	blockSize
Default	1048576
Minimum value	0
TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

timeStamp="string"

specifies to add a timestamp column to the table. Support for timeStamp is action-specific. Specify the value in the form that is appropriate for your session locale.

where={"string-1" <, "string-2", ...>}

comment=true | false

when set to True, adds comments to the DATA step code.

Default	false

fmtWdth=integer

specifies the width to use for formatting derived numbers such as parameter estimates in the DATA step code.

Alias	fmtWidth
Default	20
Range	0–32

indentSize=integer

specifies the number of spaces to indent the DATA step code for each level.

Default	3
Range	0–10

labelId=integer

specifies the label ID to use in array names and statement labels in the DATA step code. By default, a random positive integer is used.

lineSize=integer

specifies the line size for the generated code.

Default	120
Range	64–254

noTrim=true | false

when set to True, bases the comparison of variables with formatted values on the full format width with padding. By default, leading and trailing blanks are removed from the formatted values.

Default	false

projectionType="LRS" | "PCA"

specifies the type of scoring.

Default	PCA

LRS

projects the scoring observations onto the low-rank space.

PCA

projects the scoring observations onto the principal components.

tabForm=true | false

when set to True, generates the code in a way that is appropriate for storing in a table.

Alias	tableForm
Default	false

colStatistics={casouttable}

specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.

For more information about specifying the colStatistics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

decomp="NONE" | "PCA" | "SVD"

specifies the decomposition method for the low-rank matrix. If the value of the maxiter parameter is 0, decomposition is applied to the original input data instead of to the low-rank matrix.

Default	NONE

NONE

performs neither principal component analysis nor singular value decomposition.

PCA

performs principal component analysis.

SVD

performs singular value decomposition.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=true | false

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	false

freq="variable-name"

specifies a numeric variable that contains the frequency of occurrence of each observation.

icaMaxiter=integer

specifies the maximum number of iterations of Infomax ICA when training.

Default	100
Range	1–500

icaMethod="FOBI" | "GEOD" | "SYMM" | "UNCON"

specifies the ICA method for RPCA-ICA anomaly detection.

Default	FOBI

FOBI

performs the Fourth Order Blind Identification method for ICA.

GEOD

performs the Geodesic Infomax method for ICA.

SYMM

performs the Symmetric Orthogonalized Infomax method for ICA.

UNCON

performs the unconstrained Infomax method for ICA.

id={"variable-name-1" <, "variable-name-2", ...>}

specifies the variables to use as record identifiers.

image="variable-name"

specifies the name of the column that contains image binaries, encoded as JPG, PNG, TIF, or WIDE. You cannot specify this parameter with the inputs parameter.

Alias	imageVar

inputs={{casinvardesc-1} <, {casinvardesc-2}, ...>}

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda=double

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

numSigVars=integer

Default	1
Minimum value	1

outMat={outRpcaTabs}

specifies a list of parameters for the output tables of the robust principal component analysis method.

The outRpcaTabs value can be one or more of the following:

errMat={casouttable}

specifies the name of the output table for the error matrix.

For more information about specifying the errMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outError

lowRankMat={casouttable}

specifies the name of the output table for the low-rank matrix.

For more information about specifying the lowRankMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outLowRank

sparseMat={casouttable}

specifies the name of the output table for the sparse matrix.

For more information about specifying the sparseMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outSparse

outPca={outPcaTabs}

specifies a list of parameters for the output tables of the principal component analysis.

The outPcaTabs value can be one or more of the following:

pcLoadings={casouttable}

specifies the name of the output table for the principal component loadings.

For more information about specifying the pcLoadings parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

pcScores={casouttable}

specifies the name of the output table for the principal component scores.

For more information about specifying the pcScores parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

outSvd={outSvdTabs}

specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.

The outSvdTabs value can be one or more of the following:

svdDiag={casouttable}

specifies the name of the output table for the diagonal vector of the rectangular diagonal matrix.

For more information about specifying the svdDiag parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

svdLeft={casouttable}

specifies the name of the output table for the left-singular vectors.

For more information about specifying the svdLeft parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

svdRight={casouttable}

specifies the name of the output table for the right-singular vectors.

For more information about specifying the svdRight parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

pcPrefix="string"

specifies a prefix for naming the principal components.

Default	"Prin"

saveState={casouttable}

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=true | false

when set to True, scales the numeric variables by the standard deviation of each column.

Alias	scaling
Default	false

sigmaCoef=double

Default	1
Minimum value	1E-10

svdMaxRank=integer

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE" | "RANDOM"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

RANDOM

uses the randomized singular value decomposition method.

svdRand={randomizedSvd}

specifies a list of parameters to use when the value of the svdMethod parameter is RANDOM.

The randomizedSvd value can be one or more of the following:

power=integer

specifies the parameter power.

Default	0
Minimum value	0

randSeed=integer

specifies the seed value.

Default	0
Minimum value	1

* table={castable}

specifies the settings for an input table.

Long form	table={name="table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

caslib="string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

computedOnDemand=true | false

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	false

computedVars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

Alias	compVars

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

computedVarsProgram="string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the input table.

orderBy={{casinvardesc-1} <, {casinvardesc-2}, ...>}

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

singlePass=true | false

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	false

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the input data.

whereTable={groupbytable}

The groupbytable value can be one or more of the following:

casLib="string"

specifies the caslib for the filter table. By default, the active caslib is used.

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* name="table-name"

specifies the name of the filter table.

vars={{casinvardesc-1} <, {casinvardesc-2}, ...>}

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

format="string"

specifies the format to apply to the variable.

formattedLength=integer

specifies the length of the format field plus the length of the format precision.

label="string"

specifies the descriptive label for the variable.

* name="variable-name"

specifies the name for the variable.

nfd=integer

specifies the length of the format precision.

nfl=integer

specifies the length of the format field.

where="where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

useMatrix=true | false

Default	false

robustpca Action

Performs robust principal component analysis.

Python Syntax
Summary: Input and Output Tables
Parameter Descriptions

Python Syntax

results=s.robustPca.robustpca(

anomalyDetection=True | False,

anomalyDetectionMethod=integer,

attributes=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

center=True | False,

code={

"casOut":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"comment":True | False,

"fmtWdth":integer,

"indentSize":integer,

"labelId":integer,

"lineSize":integer,

"noTrim":True | False,

"projectionType":"LRS" | "PCA",

"tabForm":True | False

colStatistics={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

cumEigPctTol=double,

display={

"caseSensitive":True | False,

"exclude":True | False,

"excludeAll":True | False,

"keyIsPath":True | False,

"names":["string-1" <, "string-2", ...>],

"pathType":"LABEL" | "NAME",

"traceNames":True | False

fixedMu=True | False,

freq="variable-name",

icaMaxiter=integer,

id=["variable-name-1" <, "variable-name-2", ...>],

image="variable-name",

inputs=[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

lambda_=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

numSigVars=integer,

outMat={

"errMat":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"lowRankMat":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"sparseMat":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

}

outPca={

"pcLoadings":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"pcScores":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

}

outputTables={

"groupByVarsRaw":True | False,

"includeAll":True | False,

"names":["string-1" <, "string-2", ...>] | {"key-1":{casouttable-1} <, "key-2":{casouttable-2}, ...>},

"repeated":True | False,

"replace":True | False

outSvd={

"svdDiag":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"svdLeft":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

"svdRight":{

"caslib":"string"

"compress":True | False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

"label":"string"

"lifetime":64-bit-integer

"maxMemSize":64-bit-integer

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

"name":"table-name"

"promote":True | False

"replace":True | False

"replication":integer

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

"threadBlockSize":64-bit-integer

"timeStamp":"string"

"where":["string-1" <, "string-2", ...>]

}

pcPrefix="string",

saveState={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

scale=True | False,

sigmaCoef=double,

svdMaxRank=integer,

svdRand={

"power":integer,

"randSeed":integer

table={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

tolerance=double,

useMatrix=True | False

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parametertable	—	specifies the settings for an input table.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
code	casOut	produces SAS score code. This parameter is disabled if you specify the image parameter.
colStatistics	—	specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.
outMat	lowRankMat, sparseMat, errMat	specifies a list of parameters for the output tables of the robust principal component analysis method.
outPca	pcLoadings, pcScores	specifies a list of parameters for the output tables of the principal component analysis.
outSvd	svdDiag, svdLeft, svdRight	specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.
outputTables	names	lists the names of results tables to save as CAS tables on the server.
saveState	—	specifies the output data table in which to save the scoring results to be used in the score action of the aStore action set. You can specify the RPCA_PROJECTION_TYPE subparameter in the options parameter in the score action: the value 0 projects the scoring observations onto the principal component space; the value 1 projects the scoring observations onto the low-rank subspace; the value 2 projects the scoring observations onto the low-rank subspace, but the sparse part of the scoring data is stored in the scoring results table. The value 0 is not available if you generate the table by using the image parameter.

Parameter Descriptions

anomalyDetection=True | False

when set to True, uses a subsequent score action for anomaly detection.

Aliases	anomaly
Aliases	AD
Default	False

anomalyDetectionMethod=integer

Alias	ADMethod
Default	0
Range	0–3

attributes=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

changes the attributes of variables used in this action. Currently, attributes specified on the inputs and nominals parameter are ignored.

For more information about specifying the attributes parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	attribute
Aliases	attr

center=True | False

when set to True, centers the numeric variables by the mean of each column.

Alias	centering
Default	False

code={rpcaCodegen}

produces SAS score code. This parameter is disabled if you specify the image parameter.

The rpcaCodegen value can be one or more of the following:

"casOut":{casouttable}

specifies the settings for an output table.

The casouttable value can be one or more of the following:

"caslib":"string"

specifies the name of the caslib for the output table.

"compress":True | False

when set to True, applies data compression to the table.

Default	False

"indexVars":["variable-name-1" <, "variable-name-2", ...>]

specifies the list of variables to create indexes for in the output data.

"label":"string"

specifies the descriptive label to associate with the table.

"lifetime":64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

"maxMemSize":64-bit-integer

TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

STANDARD

use the standard memory format.

"name":"table-name"

specifies the name for the output table.

"promote":True | False

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	False

"replace":True | False

when set to True, overwrites an existing table that has the same name.

Default	False

"replication":integer

Default	1
Minimum value	0

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

"threadBlockSize":64-bit-integer

Alias	blockSize
Default	1048576
Minimum value	0
TIP	You can enclose the value in quotation marks and specify B, K, M, G, or T as a suffix to indicate the units. For example, "8M" specifies eight megabytes.

"timeStamp":"string"

specifies to add a timestamp column to the table. Support for timeStamp is action-specific. Specify the value in the form that is appropriate for your session locale.

"where":["string-1" <, "string-2", ...>]

"comment":True | False

when set to True, adds comments to the DATA step code.

Default	False

"fmtWdth":integer

specifies the width to use for formatting derived numbers such as parameter estimates in the DATA step code.

Alias	fmtWidth
Default	20
Range	0–32

"indentSize":integer

specifies the number of spaces to indent the DATA step code for each level.

Default	3
Range	0–10

"labelId":integer

specifies the label ID to use in array names and statement labels in the DATA step code. By default, a random positive integer is used.

"lineSize":integer

specifies the line size for the generated code.

Default	120
Range	64–254

"noTrim":True | False

when set to True, bases the comparison of variables with formatted values on the full format width with padding. By default, leading and trailing blanks are removed from the formatted values.

Default	False

"projectionType":"LRS" | "PCA"

specifies the type of scoring.

Default	PCA

LRS

projects the scoring observations onto the low-rank space.

PCA

projects the scoring observations onto the principal components.

"tabForm":True | False

when set to True, generates the code in a way that is appropriate for storing in a table.

Alias	tableForm
Default	False

colStatistics={casouttable}

specifies the name of the output table to contain simple statistics for the variables of the input data set. This parameter is disabled if you specify the image parameter.

For more information about specifying the colStatistics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

cumEigPctTol=double

specifies the significance level of the eigenvalues that determine the rank of the low-rank matrix.

Default	1
Range	(0–1]

decomp="NONE" | "PCA" | "SVD"

specifies the decomposition method for the low-rank matrix. If the value of the maxiter parameter is 0, decomposition is applied to the original input data instead of to the low-rank matrix.

Default	NONE

NONE

performs neither principal component analysis nor singular value decomposition.

PCA

performs principal component analysis.

SVD

performs singular value decomposition.

display={displayTables}

specifies a list of results tables to send to the client for display.

For more information about specifying the display parameter, see the common displayTables parameter (Appendix A: Common Parameters).

fixedMu=True | False

when set to True, fixes mu in each iteration of the accelerated proximal gradient method. Otherwise, mu is dynamically updated in each iteration.

Default	False

freq="variable-name"

specifies a numeric variable that contains the frequency of occurrence of each observation.

icaMaxiter=integer

specifies the maximum number of iterations of Infomax ICA when training.

Default	100
Range	1–500

icaMethod="FOBI" | "GEOD" | "SYMM" | "UNCON"

specifies the ICA method for RPCA-ICA anomaly detection.

Default	FOBI

FOBI

performs the Fourth Order Blind Identification method for ICA.

GEOD

performs the Geodesic Infomax method for ICA.

SYMM

performs the Symmetric Orthogonalized Infomax method for ICA.

UNCON

performs the unconstrained Infomax method for ICA.

id=["variable-name-1" <, "variable-name-2", ...>]

specifies the variables to use as record identifiers.

image="variable-name"

specifies the name of the column that contains image binaries, encoded as JPG, PNG, TIF, or WIDE. You cannot specify this parameter with the inputs parameter.

Alias	imageVar

inputs=[{casinvardesc-1} <, {casinvardesc-2}, ...>]

For more information about specifying the inputs parameter, see the common casinvardesc parameter (Appendix A: Common Parameters).

Aliases	input
	vars
	var

lambda_=double

Range	(0–10000000000]

lambdaWeight=double

specifies the weight of lambda.

Default	1
Range	(0–10000000000]

maxIter=integer

specifies the maximum number of iterations for robust principal component analysis algorithms.

Default	1000
Minimum value	0

method="ALM" | "APG"

specifies the method to use to perform the robust principal component analysis.

Default	ALM

ALM

uses the augmented Lagrange multiplier method.

APG

uses the accelerated proximal gradient method.

mu=double

specifies an initial value of mu in the objective function for the accelerated proximal gradient method.

Default	0.001
Range	0–10000000000

nThreads=integer

specifies the maximum number of threads to use on each computation node.

Default	16
Range	0–1024

numSigVars=integer

Default	1
Minimum value	1

outMat={outRpcaTabs}

specifies a list of parameters for the output tables of the robust principal component analysis method.

The outRpcaTabs value can be one or more of the following:

"errMat":{casouttable}

specifies the name of the output table for the error matrix.

For more information about specifying the errMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outError

"lowRankMat":{casouttable}

specifies the name of the output table for the low-rank matrix.

For more information about specifying the lowRankMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outLowRank

"sparseMat":{casouttable}

specifies the name of the output table for the sparse matrix.

For more information about specifying the sparseMat parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	outSparse

outPca={outPcaTabs}

specifies a list of parameters for the output tables of the principal component analysis.

The outPcaTabs value can be one or more of the following:

"pcLoadings":{casouttable}

specifies the name of the output table for the principal component loadings.

For more information about specifying the pcLoadings parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"pcScores":{casouttable}

specifies the name of the output table for the principal component scores.

For more information about specifying the pcScores parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

outputTables={outputTables}

lists the names of results tables to save as CAS tables on the server.

For more information about specifying the outputTables parameter, see the common outputTables parameter (Appendix A: Common Parameters).

Alias	displayOut

outSvd={outSvdTabs}

specifies a list of parameters for the output tables of the singular value decomposition. This parameter is disabled if you specify the image parameter.

The outSvdTabs value can be one or more of the following:

"svdDiag":{casouttable}

specifies the name of the output table for the diagonal vector of the rectangular diagonal matrix.

For more information about specifying the svdDiag parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"svdLeft":{casouttable}

specifies the name of the output table for the left-singular vectors.

For more information about specifying the svdLeft parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

"svdRight":{casouttable}

specifies the name of the output table for the right-singular vectors.

For more information about specifying the svdRight parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

pcPrefix="string"

specifies a prefix for naming the principal components.

Default	"Prin"

saveState={casouttable}

For more information about specifying the saveState parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

scale=True | False

when set to True, scales the numeric variables by the standard deviation of each column.

Alias	scaling
Default	False

sigmaCoef=double

Default	1
Minimum value	1E-10

svdMaxRank=integer

Minimum value	1

svdMethod="EIGEN" | "ITERATIVE" | "RANDOM"

specifies the type of the singular value decomposition solver.

Default	EIGEN

EIGEN

uses the eigenvalue decomposition method.

ITERATIVE

uses the iterative singular value decomposition method.

RANDOM

uses the randomized singular value decomposition method.

svdRand={randomizedSvd}

specifies a list of parameters to use when the value of the svdMethod parameter is RANDOM.

The randomizedSvd value can be one or more of the following:

"power":integer

specifies the parameter power.

Default	0
Minimum value	0

"randSeed":integer

specifies the seed value.

Default	0
Minimum value	1

* table={castable}

specifies the settings for an input table.

Long form	table={"name":"table-name"}
Shortcut form	table="table-name"

The castable value can be one or more of the following:

"caslib":"string"

specifies the caslib for the input table that you want to use with the action. By default, the active caslib is used. Specify a value only if you need to access a table from a different caslib.

"computedOnDemand":True | False

when set to True, creates the computed variables when the table is loaded instead of when the action begins.

Alias	compOnDemand
Default	False

"computedVars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

Alias	compVars

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"computedVarsProgram":"string"

specifies an expression for each computed variable that you include in the computedVars parameter.

Alias	compPgm

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>}

specifies data source options.

Aliases	options
Aliases	dataSource

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import_

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* "name":"table-name"

specifies the name of the input table.

"orderBy":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"singlePass":True | False

when set to True, does not create a transient table on the server. Setting this parameter to True can be efficient, but the data might not have stable ordering upon repeated runs.

Default	False

"vars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variables to use in the action.

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"where":"where-expression"

specifies an expression for subsetting the input data.

"whereTable":{groupbytable}

The groupbytable value can be one or more of the following:

"casLib":"string"

specifies the caslib for the filter table. By default, the active caslib is used.

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

specifies data source options.

Aliases	options
Aliases	dataSource

For more information about specifying the dataSourceOptions parameter, see the common dataSourceOptions parameter (Appendix A: Common Parameters).

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters}

specifies the settings for reading a table from a data source.

Alias	import_

For more information about specifying the importOptions parameter, see the common importOptions parameter (Appendix A: Common Parameters).

* "name":"table-name"

specifies the name of the filter table.

"vars":[{casinvardesc-1} <, {casinvardesc-2}, ...>]

specifies the variable names to use from the filter table.

The casinvardesc value can be one or more of the following:

"format":"string"

specifies the format to apply to the variable.

"formattedLength":integer

specifies the length of the format field plus the length of the format precision.

"label":"string"

specifies the descriptive label for the variable.

* "name":"variable-name"

specifies the name for the variable.

"nfd":integer

specifies the length of the format precision.

"nfl":integer

specifies the length of the format field.

"where":"where-expression"

specifies an expression for subsetting the data from the filter table.

tolerance=double

specifies the convergence criterion for the robust principal component analysis algorithms.

Alias	stopcriterion
Default	1E-07
Minimum value	1E-10

useMatrix=True | False

Default	False

robustpca Action

Performs robust principal component analysis.

R Syntax
Summary: Input and Output Tables
Parameter Descriptions

R Syntax

results <– cas.robustPca.robustpca(s,

anomalyDetection=TRUE | FALSE,

anomalyDetectionMethod=integer,

attributes=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

center=TRUE | FALSE,

code=list(

casOut=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

comment=TRUE | FALSE,

fmtWdth=integer,

indentSize=integer,

labelId=integer,

lineSize=integer,

noTrim=TRUE | FALSE,

projectionType="LRS" | "PCA",

tabForm=TRUE | FALSE

colStatistics=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

cumEigPctTol=double,

display=list(

caseSensitive=TRUE | FALSE,

exclude=TRUE | FALSE,

excludeAll=TRUE | FALSE,

keyIsPath=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>),

pathType="LABEL" | "NAME",

traceNames=TRUE | FALSE

fixedMu=TRUE | FALSE,

freq="variable-name",

icaMaxiter=integer,

id=list("variable-name-1" <, "variable-name-2", ...>),

image="variable-name",

inputs=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

lambda=double,

lambdaWeight=double,

maxIter=integer,

mu=double,

nThreads=integer,

numSigVars=integer,

outMat=list(

errMat=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

lowRankMat=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

sparseMat=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

)

outPca=list(

pcLoadings=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

pcScores=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

)

outputTables=list(

groupByVarsRaw=TRUE | FALSE,

includeAll=TRUE | FALSE,

names=list("string-1" <, "string-2", ...>) | list(key-1=list(casouttable-1) <, key-2=list(casouttable-2), ...>),

repeated=TRUE | FALSE,

replace=TRUE | FALSE

outSvd=list(

svdDiag=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

svdLeft=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

svdRight=list(

caslib="string"

compress=TRUE | FALSE

indexVars=list("variable-name-1" <, "variable-name-2", ...>)

label="string"

lifetime=64-bit-integer

maxMemSize=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

replication=integer

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

threadBlockSize=64-bit-integer

timeStamp="string"

where=list("string-1" <, "string-2", ...>)

)

pcPrefix="string",

saveState=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

scale=TRUE | FALSE,

sigmaCoef=double,

svdMaxRank=integer,