Text Mining Action Set

Provides actions for mining textual data

tmMine Action

Combines the tpParse action, the tpAccumulate action, and SVD functionality into one action. Some parameters require a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

CASL Syntax
Summary: Input and Output Tables
Parameter Descriptions

CASL Syntax

textMining.tmMine <result=results> <status=rc> /

cellWeight="LOG" | "NONE",

child={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

complexTag=TRUE | FALSE,

copyVars={"variable-name-1" <, "variable-name-2", ...>},

defaultEntitiesPriority=integer,

docId="variable-name",

docPro={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

docStdMultiple=double,

documents={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

importOptions={fileType="ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

dataSourceOptions={adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

entities="NONE" | "STD",

exactDocPro=TRUE | FALSE,

exactWeight=TRUE | FALSE,

includeEmptyDocument=TRUE | FALSE,

k=integer,

legacyNames=TRUE | FALSE,

liti={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

maxK=integer,

multiterm={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

norm="ALL" | "DOC" | "NONE" | "WORD",

nounGroups=TRUE | FALSE,

nThreads=integer,

numLabels=integer,

offset={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

parent={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

parseConfig={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

reduce=integer,

resolution="HIGH" | "LOW" | "MED",

rotate="PROMAX" | "VARIMAX",

rowPivot=double,

s={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

saveState={

caslib="string",

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

selectAttribute={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

selectEntity={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

selectPos={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

showDroppedTerms=TRUE | FALSE,

startList={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

stemming=TRUE | FALSE,

stopList={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

synonyms={

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=TRUE | FALSE,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tagging=TRUE | FALSE,

target="variable-name",

terms={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

termStdMultiple=double,

termTopics={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

termWeight="ENTROPY" | "MI" | "NONE",

text="variable-name",

tolerance=double,

topicDecision=TRUE | FALSE,

topics={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

u={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

v={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

wordPro={

caslib="string",

compress=TRUE | FALSE,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

}

;

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parameterdocuments	—	names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.
liti	—	specifies the input CAS table that contains the LITI binary, which contains the predefined or custom concept definitions. The tmMine action can reference a concepts model that is compiled in the compileConcept action. For more information on how to do this, see the example, Referencing a Concepts Model in the tmMine Action. This parameter requires a SAS Visual Text Analytics license.
multiterm	—	specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.
startList	—	specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
stopList	—	specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
synonyms	—	specifies the input CAS table that contains user-defined synonyms to be used in the analysis. If specified, the table must have the following variables (all varchar): Term, Parent. Termrole and parentrole variables are optional.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
child	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.
docPro	—	specifies the name of the table to contain the SVD projections of the documents.
offset	—	specifies the name of the output CAS table to contain the position information about the occurrences of child terms in the document collection. The maximum output length of a tokenized term in this table is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value.
parent	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.
parseConfig	—	specifies the name of the config CAS table to contain parsing configuration information.
s	—	specifies the S matrix, which is a diagonal matrix that is output in compressed form, with two variables and k rows. The variable _ID_ indicates the row and column of the entry and the variable S contains the singular values.
saveState	—	specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
termTopics	—	specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.
terms	—	specifies the output CAS table to contain the summary information about the terms in the document collection. The maximum output length of a tokenized term is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
topics	—	specifies the output CAS table to contain the topics that are discovered.
u	—	specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.
v	—	specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.
wordPro	—	specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

Parameter Descriptions

cellWeight="LOG" | "NONE"

specifies how the elements in the term-by-document matrix (the parent output table) are weighted.

Alias	cellWgt
Default	LOG

child={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.

For more information about specifying the child parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

complexTag=TRUE | FALSE

Specifies if the part of speech tags that should be used for tokenization and accumulation are detailed (complex, such as A.nom.f.p), or general (simple, such as A). This parameter requires a SAS Visual Text Analytics license.

Default	FALSE

copyVars={"variable-name-1" <, "variable-name-2", ...>}

specifies a list of variables from the documents table that are to be retained on the output docPro table. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Aliases	keepVars
Aliases	keepVar

defaultEntitiesPriority=integer

specifies the priority of the default LITI file that contains predefined concepts when both predefined and custom concepts are used. The default setting is 1, which means that the predefined concepts have the lowest priority compared to the custom concepts. However, certain predefined concepts within the LITI file may still have a higher priority. For more information, see the SAS Visual Text Analytics User's Guide. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–32

* docId="variable-name"

specifies the character or numeric variable on the documents table that contains the ID of each document.

Default	"DOC_ID"

docPro={casouttable}

specifies the name of the table to contain the SVD projections of the documents.

For more information about specifying the docPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

docStdMultiple=double

specifies how many standard deviations above the mean to set the document cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

* documents={castable}

names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.

For more information about specifying the documents parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

entities="NONE" | "STD"

specifies whether to extract entities in parsing. If set to None, no entities are output. If set to STD, the standard entities are output.

Default	NONE

exactDocPro=TRUE | FALSE

specifies if the exact document projection values should be output. This parameter requires a SAS Visual Text Analytics license.

Default	TRUE

exactWeight=TRUE | FALSE

specifies if the exact entries on the u table are to be used in the topic computation, otherwise the values are rounded to three decimal places.

Alias	exactWeights
Default	FALSE

includeEmptyDocument=TRUE | FALSE

Indicates if empty document indicators are included on parent table. This parameter requires a SAS Visual Text Analytics license.

Default	FALSE

k=integer

specifies the number of dimensions to be extracted (also the number of derived topics). If the input data is too small for the requested number of dimensions, this value is adjusted to complete the calculation.

Alias	numTopics
Range	1–1000

language="ARABIC" | "CHINESE" | "CROATIAN" | "CZECH" | "DANISH" | "DUTCH" | "ENGLISH" | "FARSI" | "FINNISH" | "FRENCH" | "GERMAN" | "GREEK" | "HEBREW" | "HINDI" | "HUNGARIAN" | "INDONESIAN" | "ITALIAN" | "JAPANESE" | "KAZAKH" | "KOREAN" | "NORWEGIAN" | "POLISH" | "PORTUGUESE" | "ROMANIAN" | "RUSSIAN" | "SLOVAK" | "SLOVENE" | "SPANISH" | "SWEDISH" | "TAGALOG" | "THAI" | "TURKISH" | "VIETNAMESE"

specifies the language used in the text variable of the input document table.

Default	ENGLISH

legacyNames=TRUE | FALSE

specifies whether to use the legacy variable names on tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	FALSE

liti={castable}

specifies the input CAS table that contains the LITI binary, which contains the predefined or custom concept definitions. The tmMine action can reference a concepts model that is compiled in the compileConcept action. For more information on how to do this, see the example, Referencing a Concepts Model in the tmMine Action. This parameter requires a SAS Visual Text Analytics license.

For more information about specifying the liti parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

maxK=integer

specifies the maximum number of dimensions to be extracted. The maxK option can be used in conjunction with the resolution option to dynamically select the recommended number of dimensions. If you wish to use a specific number of dimensions use maxK and set the resolution to high, or use the k parameter.

Default	10
Range	1–1000

multiterm={castable}

specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.

For more information about specifying the multiterm parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

norm="ALL" | "DOC" | "NONE" | "WORD"

specifies whether to normalize the document projections, term projections, or both. The normalization converts the representation from depending on angles between vectors to depending on Euclidean distances between vectors.

Default	ALL

nounGroups=TRUE | FALSE

when set to True, extracts noun groups during parsing and adds the noun groups as additional rows in the offset table. This is also reflected in the terms and parent tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	TRUE

nThreads=integer

specifies the number of threads to be used per node. The value must be an integer. When the value is 0, the number of threads equals the number of CPUs.

Default	8
Minimum value	0

numLabels=integer

specifies the number of terms to use in the descriptive label for each topic.

Default	5
Range	1–500

offset={casouttable}

specifies the name of the output CAS table to contain the position information about the occurrences of child terms in the document collection. The maximum output length of a tokenized term in this table is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value.

For more information about specifying the offset parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parent={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.

For more information about specifying the parent parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parseConfig={casouttable}

specifies the name of the config CAS table to contain parsing configuration information.

For more information about specifying the parseConfig parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	scoreConfig

reduce=integer

specifies the minimum number of documents a term should be in to be kept. The value must be an integer.

Default	10
Range	1–32767

resolution="HIGH" | "LOW" | "MED"

specifies the desired resolution level for the recommended number of dimensions to be extracted by the SVD.

Default	HIGH

rotate="PROMAX" | "VARIMAX"

specifies the type of rotation used to maximize the explanatory power of each topic. A VARIMAX rotation produces uncorrelated topics and a PROMAX rotation produces correlated topics.

Default	VARIMAX

rowPivot=double

specifies the row-pivot weight for document normalization of the parent table before the SVD. A negative value turns off the row-pivot process. When topics are requested, a value of 1 is used for this parameter by default. This parameter requires a SAS Visual Text Analytics license.

Default	-1
Range	-1–1

s={casouttable}

specifies the S matrix, which is a diagonal matrix that is output in compressed form, with two variables and k rows. The variable _ID_ indicates the row and column of the entry and the variable S contains the singular values.

For more information about specifying the s parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

saveState={casouttable}

specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Long form	saveState={name="table-name"}
Shortcut form	saveState="table-name"

The casouttable value can be one or more of the following:

caslib="string"

specifies the name of the caslib for the output table.

label="string"

specifies the descriptive label to associate with the table.

lifetime=64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

memoryFormat="DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

use the default memory format that is set for the server. By default, the server uses the standard memory format. If an administrator sets the CAS_DEFAULT_MEMORY_FORMAT environment variable to DVR, then the DVR memory format is set as the default for the server.

STANDARD

use the standard memory format.

name="table-name"

specifies the name for the output table.

promote=TRUE | FALSE

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	FALSE

replace=TRUE | FALSE

when set to True, overwrites an existing table that has the same name.

Default	FALSE

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

selectAttribute={seltag}

specifies a list of attribute types to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectEntity={seltag}

specifies a list of entity types to be kept or ignored. If this parameter is specified, entities must be set to STD.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectPos={seltag}

specifies a list of part-of-speech tags to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

showDroppedTerms=TRUE | FALSE

specifies whether to include terms that have a keep status of N in the TERMS output table.

Default	FALSE

startList={castable}

specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the startList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

stemming=TRUE | FALSE

specifies whether stemming is to occur in parsing. When set to True, terms are evaluated to see if they belong to a common parent form and the information is added to the offset table.

Default	TRUE

stopList={castable}

specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the stopList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

synonyms={castable}

specifies the input CAS table that contains user-defined synonyms to be used in the analysis. If specified, the table must have the following variables (all varchar): Term, Parent. Termrole and parentrole variables are optional.

For more information about specifying the synonyms parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

tagging=TRUE | FALSE

specifies whether part-of-speech tagging is used in parsing.

Default	TRUE

target="variable-name"

specifies the numeric or character variable that contains a category level on the documents table. This parameter is optional unless you plan to use Mutual Information as the term weight in accumulation.

terms={casouttable}

specifies the output CAS table to contain the summary information about the terms in the document collection. The maximum output length of a tokenized term is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

For more information about specifying the terms parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termStdMultiple=double

specifies how many standard deviations above the mean to set the term cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

termTopics={casouttable}

specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.

For more information about specifying the termTopics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termWeight="ENTROPY" | "MI" | "NONE"

specifies how terms are weighted. Valid values are Entropy, None and MI (Mutual Information). MI requires a target variable in the offset table, which is generated by the tpParse action.

Alias	termWgt
Default	ENTROPY

* text="variable-name"

specifies the character variable in the documents table that contains the text to be processed.

Default	"text"

tolerance=double

specifies the stopping threshold for the iterative factorization algorithm. If 0 is specified the default value is used.

Default	1E-05
Range	0–1

topicDecision=TRUE | FALSE

Specifies to include topic membership decisions and document cutoffs in the output tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	FALSE

topics={casouttable}

specifies the output CAS table to contain the topics that are discovered.

For more information about specifying the topics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

u={casouttable}

specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.

For more information about specifying the u parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

v={casouttable}

specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.

For more information about specifying the v parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

wordPro={casouttable}

specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

For more information about specifying the wordPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

tmMine Action

Lua Syntax
Summary: Input and Output Tables
Parameter Descriptions

Lua Syntax

results, info = s:textMining_tmMine{

cellWeight="LOG" | "NONE",

child={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

complexTag=true | false,

copyVars={"variable-name-1" <, "variable-name-2", ...>},

defaultEntitiesPriority=integer,

docId="variable-name",

docPro={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

docStdMultiple=double,

documents={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

entities="NONE" | "STD",

exactDocPro=true | false,

exactWeight=true | false,

includeEmptyDocument=true | false,

k=integer,

legacyNames=true | false,

liti={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

maxK=integer,

multiterm={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

norm="ALL" | "DOC" | "NONE" | "WORD",

nounGroups=true | false,

nThreads=integer,

numLabels=integer,

offset={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

parent={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

parseConfig={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

reduce=integer,

resolution="HIGH" | "LOW" | "MED",

rotate="PROMAX" | "VARIMAX",

rowPivot=double,

s={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

saveState={

caslib="string",

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

selectAttribute={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

selectEntity={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

selectPos={

opType="IGNORE" | "KEEP",

tagList={"string-1" <, "string-2", ...>}

showDroppedTerms=true | false,

startList={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

stemming=true | false,

stopList={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

synonyms={

caslib="string",

computedOnDemand=true | false,

computedVars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

computedVarsProgram="string",

dataSourceOptions={key-1=any-list-or-data-type-1 <, key-2=any-list-or-data-type-2, ...>},

groupBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

singlePass=true | false,

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}},

where="where-expression",

whereTable={

casLib="string"

name="table-name"

vars={{

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

}, {...}}

where="where-expression"

}

tagging=true | false,

target="variable-name",

terms={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

termStdMultiple=double,

termTopics={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

termWeight="ENTROPY" | "MI" | "NONE",

text="variable-name",

tolerance=double,

topicDecision=true | false,

topics={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

u={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

v={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

wordPro={

caslib="string",

compress=true | false,

indexVars={"variable-name-1" <, "variable-name-2", ...>},

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=true | false,

replace=true | false,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where={"string-1" <, "string-2", ...>}

}

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parameterdocuments	—	names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.
liti	—	specifies the input CAS table that contains the LITI binary, which contains the predefined or custom concept definitions. The tmMine action can reference a concepts model that is compiled in the compileConcept action. For more information on how to do this, see the example, Referencing a Concepts Model in the tmMine Action. This parameter requires a SAS Visual Text Analytics license.
multiterm	—	specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.
startList	—	specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
stopList	—	specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
synonyms	—	specifies the input CAS table that contains user-defined synonyms to be used in the analysis. If specified, the table must have the following variables (all varchar): Term, Parent. Termrole and parentrole variables are optional.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
child	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.
docPro	—	specifies the name of the table to contain the SVD projections of the documents.
offset	—	specifies the name of the output CAS table to contain the position information about the occurrences of child terms in the document collection. The maximum output length of a tokenized term in this table is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value.
parent	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.
parseConfig	—	specifies the name of the config CAS table to contain parsing configuration information.
s	—	specifies the S matrix, which is a diagonal matrix that is output in compressed form, with two variables and k rows. The variable _ID_ indicates the row and column of the entry and the variable S contains the singular values.
saveState	—	specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
termTopics	—	specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.
terms	—	specifies the output CAS table to contain the summary information about the terms in the document collection. The maximum output length of a tokenized term is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
topics	—	specifies the output CAS table to contain the topics that are discovered.
u	—	specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.
v	—	specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.
wordPro	—	specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

Parameter Descriptions

cellWeight="LOG" | "NONE"

specifies how the elements in the term-by-document matrix (the parent output table) are weighted.

Alias	cellWgt
Default	LOG

child={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.

For more information about specifying the child parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

complexTag=true | false

Default	false

copyVars={"variable-name-1" <, "variable-name-2", ...>}

Aliases	keepVars
Aliases	keepVar

defaultEntitiesPriority=integer

Default	1
Range	0–32

* docId="variable-name"

specifies the character or numeric variable on the documents table that contains the ID of each document.

Default	"DOC_ID"

docPro={casouttable}

specifies the name of the table to contain the SVD projections of the documents.

For more information about specifying the docPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

docStdMultiple=double

specifies how many standard deviations above the mean to set the document cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

* documents={castable}

names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.

For more information about specifying the documents parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

entities="NONE" | "STD"

specifies whether to extract entities in parsing. If set to None, no entities are output. If set to STD, the standard entities are output.

Default	NONE

exactDocPro=true | false

specifies if the exact document projection values should be output. This parameter requires a SAS Visual Text Analytics license.

Default	true

exactWeight=true | false

specifies if the exact entries on the u table are to be used in the topic computation, otherwise the values are rounded to three decimal places.

Alias	exactWeights
Default	false

includeEmptyDocument=true | false

Indicates if empty document indicators are included on parent table. This parameter requires a SAS Visual Text Analytics license.

Default	false

k=integer

Alias	numTopics
Range	1–1000

language="ARABIC" | "CHINESE" | "CROATIAN" | "CZECH" | "DANISH" | "DUTCH" | "ENGLISH" | "FARSI" | "FINNISH" | "FRENCH" | "GERMAN" | "GREEK" | "HEBREW" | "HINDI" | "HUNGARIAN" | "INDONESIAN" | "ITALIAN" | "JAPANESE" | "KAZAKH" | "KOREAN" | "NORWEGIAN" | "POLISH" | "PORTUGUESE" | "ROMANIAN" | "RUSSIAN" | "SLOVAK" | "SLOVENE" | "SPANISH" | "SWEDISH" | "TAGALOG" | "THAI" | "TURKISH" | "VIETNAMESE"

specifies the language used in the text variable of the input document table.

Default	ENGLISH

legacyNames=true | false

specifies whether to use the legacy variable names on tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	false

liti={castable}

For more information about specifying the liti parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

maxK=integer

Default	10
Range	1–1000

multiterm={castable}

specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.

For more information about specifying the multiterm parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

norm="ALL" | "DOC" | "NONE" | "WORD"

Default	ALL

nounGroups=true | false

Default	true

nThreads=integer

specifies the number of threads to be used per node. The value must be an integer. When the value is 0, the number of threads equals the number of CPUs.

Default	8
Minimum value	0

numLabels=integer

specifies the number of terms to use in the descriptive label for each topic.

Default	5
Range	1–500

offset={casouttable}

For more information about specifying the offset parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parent={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.

For more information about specifying the parent parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parseConfig={casouttable}

specifies the name of the config CAS table to contain parsing configuration information.

For more information about specifying the parseConfig parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	scoreConfig

reduce=integer

specifies the minimum number of documents a term should be in to be kept. The value must be an integer.

Default	10
Range	1–32767

resolution="HIGH" | "LOW" | "MED"

specifies the desired resolution level for the recommended number of dimensions to be extracted by the SVD.

Default	HIGH

rotate="PROMAX" | "VARIMAX"

specifies the type of rotation used to maximize the explanatory power of each topic. A VARIMAX rotation produces uncorrelated topics and a PROMAX rotation produces correlated topics.

Default	VARIMAX

rowPivot=double

Default	-1
Range	-1–1

s={casouttable}

For more information about specifying the s parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

saveState={casouttable}

specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Long form	saveState={name="table-name"}
Shortcut form	saveState="table-name"

The casouttable value can be one or more of the following:

caslib="string"

specifies the name of the caslib for the output table.

label="string"

specifies the descriptive label to associate with the table.

lifetime=64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

memoryFormat="DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

STANDARD

use the standard memory format.

name="table-name"

specifies the name for the output table.

promote=true | false

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	false

replace=true | false

when set to True, overwrites an existing table that has the same name.

Default	false

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

selectAttribute={seltag}

specifies a list of attribute types to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectEntity={seltag}

specifies a list of entity types to be kept or ignored. If this parameter is specified, entities must be set to STD.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectPos={seltag}

specifies a list of part-of-speech tags to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

showDroppedTerms=true | false

specifies whether to include terms that have a keep status of N in the TERMS output table.

Default	false

startList={castable}

specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the startList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

stemming=true | false

specifies whether stemming is to occur in parsing. When set to True, terms are evaluated to see if they belong to a common parent form and the information is added to the offset table.

Default	true

stopList={castable}

specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the stopList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

synonyms={castable}

For more information about specifying the synonyms parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

tagging=true | false

specifies whether part-of-speech tagging is used in parsing.

Default	true

target="variable-name"

terms={casouttable}

For more information about specifying the terms parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termStdMultiple=double

specifies how many standard deviations above the mean to set the term cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

termTopics={casouttable}

specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.

For more information about specifying the termTopics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termWeight="ENTROPY" | "MI" | "NONE"

specifies how terms are weighted. Valid values are Entropy, None and MI (Mutual Information). MI requires a target variable in the offset table, which is generated by the tpParse action.

Alias	termWgt
Default	ENTROPY

* text="variable-name"

specifies the character variable in the documents table that contains the text to be processed.

Default	"text"

tolerance=double

specifies the stopping threshold for the iterative factorization algorithm. If 0 is specified the default value is used.

Default	1E-05
Range	0–1

topicDecision=true | false

Default	false

topics={casouttable}

specifies the output CAS table to contain the topics that are discovered.

For more information about specifying the topics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

u={casouttable}

specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.

For more information about specifying the u parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

v={casouttable}

specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.

For more information about specifying the v parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

wordPro={casouttable}

specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

For more information about specifying the wordPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

tmMine Action

Python Syntax
Summary: Input and Output Tables
Parameter Descriptions

Python Syntax

results=s.textMining.tmMine(

cellWeight="LOG" | "NONE",

child={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

complexTag=True | False,

copyVars=["variable-name-1" <, "variable-name-2", ...>],

defaultEntitiesPriority=integer,

docId="variable-name",

docPro={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

docStdMultiple=double,

documents={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"importOptions":{"fileType":"ANY" | "AUDIO" | "AUTO" | "BASESAS" | "CSV" | "DELIMITED" | "DOCUMENT" | "DTA" | "ESP" | "EXCEL" | "FMT" | "HDAT" | "IMAGE" | "JMP" | "LASR" | "PARQUET" | "SOUND" | "SPSS" | "VIDEO" | "XLS", fileType-specific-parameters},

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"dataSourceOptions":{adls_noreq-parameters | bigquery-parameters | cas_noreq-parameters | clouddex-parameters | db2-parameters | dnfs-parameters | esp-parameters | fedsvr-parameters | gcs_noreq-parameters | hadoop-parameters | hana-parameters | impala-parameters | informix-parameters | jdbc-parameters | mongodb-parameters | mysql-parameters | odbc-parameters | oracle-parameters | path-parameters | postgres-parameters | redshift-parameters | s3-parameters | sapiq-parameters | sforce-parameters | singlestore_standard-parameters | snowflake-parameters | spark-parameters | spde-parameters | sqlserver-parameters | ss_noreq-parameters | teradata-parameters | vertica-parameters | yellowbrick-parameters}

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

entities="NONE" | "STD",

exactDocPro=True | False,

exactWeight=True | False,

includeEmptyDocument=True | False,

k=integer,

legacyNames=True | False,

liti={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

maxK=integer,

multiterm={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

norm="ALL" | "DOC" | "NONE" | "WORD",

nounGroups=True | False,

nThreads=integer,

numLabels=integer,

offset={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

parent={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

parseConfig={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

reduce=integer,

resolution="HIGH" | "LOW" | "MED",

rotate="PROMAX" | "VARIMAX",

rowPivot=double,

s={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

saveState={

"caslib":"string",

"label":"string",

"lifetime":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

selectAttribute={

"opType":"IGNORE" | "KEEP",

"tagList":["string-1" <, "string-2", ...>]

selectEntity={

"opType":"IGNORE" | "KEEP",

"tagList":["string-1" <, "string-2", ...>]

selectPos={

"opType":"IGNORE" | "KEEP",

"tagList":["string-1" <, "string-2", ...>]

showDroppedTerms=True | False,

startList={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

stemming=True | False,

stopList={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

synonyms={

"caslib":"string",

"computedOnDemand":True | False,

"computedVars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"computedVarsProgram":"string",

"dataSourceOptions":{"key-1":{any-list-or-data-type-1} <, "key-2":{any-list-or-data-type-2}, ...>},

"groupBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"groupByMode":"NOSORT" | "REDISTRIBUTE",

"name":"table-name",

"orderBy":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"singlePass":True | False,

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>],

"where":"where-expression",

"whereTable":{

"casLib":"string"

"name":"table-name"

"vars":[{

"format":"string",

"formattedLength":integer,

"label":"string",

"name":"variable-name",

"nfd":integer,

"nfl":integer

}<, {...}>]

"where":"where-expression"

}

tagging=True | False,

target="variable-name",

terms={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

termStdMultiple=double,

termTopics={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

termWeight="ENTROPY" | "MI" | "NONE",

text="variable-name",

tolerance=double,

topicDecision=True | False,

topics={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

u={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

v={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

wordPro={

"caslib":"string",

"compress":True | False,

"indexVars":["variable-name-1" <, "variable-name-2", ...>],

"label":"string",

"lifetime":64-bit-integer,

"maxMemSize":64-bit-integer,

"memoryFormat":"DVR" | "INHERIT" | "STANDARD",

"name":"table-name",

"promote":True | False,

"replace":True | False,

"replication":integer,

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE",

"threadBlockSize":64-bit-integer,

"timeStamp":"string",

"where":["string-1" <, "string-2", ...>]

}

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parameterdocuments	—	names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.
liti	—	specifies the input CAS table that contains the LITI binary, which contains the predefined or custom concept definitions. The tmMine action can reference a concepts model that is compiled in the compileConcept action. For more information on how to do this, see the example, Referencing a Concepts Model in the tmMine Action. This parameter requires a SAS Visual Text Analytics license.
multiterm	—	specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.
startList	—	specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
stopList	—	specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
synonyms	—	specifies the input CAS table that contains user-defined synonyms to be used in the analysis. If specified, the table must have the following variables (all varchar): Term, Parent. Termrole and parentrole variables are optional.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
child	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.
docPro	—	specifies the name of the table to contain the SVD projections of the documents.
offset	—	specifies the name of the output CAS table to contain the position information about the occurrences of child terms in the document collection. The maximum output length of a tokenized term in this table is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value.
parent	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.
parseConfig	—	specifies the name of the config CAS table to contain parsing configuration information.
s	—	specifies the S matrix, which is a diagonal matrix that is output in compressed form, with two variables and k rows. The variable _ID_ indicates the row and column of the entry and the variable S contains the singular values.
saveState	—	specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
termTopics	—	specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.
terms	—	specifies the output CAS table to contain the summary information about the terms in the document collection. The maximum output length of a tokenized term is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
topics	—	specifies the output CAS table to contain the topics that are discovered.
u	—	specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.
v	—	specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.
wordPro	—	specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

Parameter Descriptions

cellWeight="LOG" | "NONE"

specifies how the elements in the term-by-document matrix (the parent output table) are weighted.

Alias	cellWgt
Default	LOG

child={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.

For more information about specifying the child parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

complexTag=True | False

Default	False

copyVars=["variable-name-1" <, "variable-name-2", ...>]

Aliases	keepVars
Aliases	keepVar

defaultEntitiesPriority=integer

Default	1
Range	0–32

* docId="variable-name"

specifies the character or numeric variable on the documents table that contains the ID of each document.

Default	"DOC_ID"

docPro={casouttable}

specifies the name of the table to contain the SVD projections of the documents.

For more information about specifying the docPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

docStdMultiple=double

specifies how many standard deviations above the mean to set the document cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

* documents={castable}

names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.

For more information about specifying the documents parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

entities="NONE" | "STD"

specifies whether to extract entities in parsing. If set to None, no entities are output. If set to STD, the standard entities are output.

Default	NONE

exactDocPro=True | False

specifies if the exact document projection values should be output. This parameter requires a SAS Visual Text Analytics license.

Default	True

exactWeight=True | False

specifies if the exact entries on the u table are to be used in the topic computation, otherwise the values are rounded to three decimal places.

Alias	exactWeights
Default	False

includeEmptyDocument=True | False

Indicates if empty document indicators are included on parent table. This parameter requires a SAS Visual Text Analytics license.

Default	False

k=integer

Alias	numTopics
Range	1–1000

language="ARABIC" | "CHINESE" | "CROATIAN" | "CZECH" | "DANISH" | "DUTCH" | "ENGLISH" | "FARSI" | "FINNISH" | "FRENCH" | "GERMAN" | "GREEK" | "HEBREW" | "HINDI" | "HUNGARIAN" | "INDONESIAN" | "ITALIAN" | "JAPANESE" | "KAZAKH" | "KOREAN" | "NORWEGIAN" | "POLISH" | "PORTUGUESE" | "ROMANIAN" | "RUSSIAN" | "SLOVAK" | "SLOVENE" | "SPANISH" | "SWEDISH" | "TAGALOG" | "THAI" | "TURKISH" | "VIETNAMESE"

specifies the language used in the text variable of the input document table.

Default	ENGLISH

legacyNames=True | False

specifies whether to use the legacy variable names on tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	False

liti={castable}

For more information about specifying the liti parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

maxK=integer

Default	10
Range	1–1000

multiterm={castable}

specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.

For more information about specifying the multiterm parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

norm="ALL" | "DOC" | "NONE" | "WORD"

Default	ALL

nounGroups=True | False

Default	True

nThreads=integer

specifies the number of threads to be used per node. The value must be an integer. When the value is 0, the number of threads equals the number of CPUs.

Default	8
Minimum value	0

numLabels=integer

specifies the number of terms to use in the descriptive label for each topic.

Default	5
Range	1–500

offset={casouttable}

For more information about specifying the offset parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parent={casouttable}

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.

For more information about specifying the parent parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parseConfig={casouttable}

specifies the name of the config CAS table to contain parsing configuration information.

For more information about specifying the parseConfig parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	scoreConfig

reduce=integer

specifies the minimum number of documents a term should be in to be kept. The value must be an integer.

Default	10
Range	1–32767

resolution="HIGH" | "LOW" | "MED"

specifies the desired resolution level for the recommended number of dimensions to be extracted by the SVD.

Default	HIGH

rotate="PROMAX" | "VARIMAX"

specifies the type of rotation used to maximize the explanatory power of each topic. A VARIMAX rotation produces uncorrelated topics and a PROMAX rotation produces correlated topics.

Default	VARIMAX

rowPivot=double

Default	-1
Range	-1–1

s={casouttable}

For more information about specifying the s parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

saveState={casouttable}

specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Long form	saveState={"name":"table-name"}
Shortcut form	saveState="table-name"

The casouttable value can be one or more of the following:

"caslib":"string"

specifies the name of the caslib for the output table.

"label":"string"

specifies the descriptive label to associate with the table.

"lifetime":64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

"memoryFormat":"DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

STANDARD

use the standard memory format.

"name":"table-name"

specifies the name for the output table.

"promote":True | False

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	False

"replace":True | False

when set to True, overwrites an existing table that has the same name.

Default	False

"tableRedistUpPolicy":"DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

selectAttribute={seltag}

specifies a list of attribute types to be kept or ignored.

The seltag value can be one or more of the following:

"opType":"IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectEntity={seltag}

specifies a list of entity types to be kept or ignored. If this parameter is specified, entities must be set to STD.

The seltag value can be one or more of the following:

"opType":"IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectPos={seltag}

specifies a list of part-of-speech tags to be kept or ignored.

The seltag value can be one or more of the following:

"opType":"IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

showDroppedTerms=True | False

specifies whether to include terms that have a keep status of N in the TERMS output table.

Default	False

startList={castable}

specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the startList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

stemming=True | False

specifies whether stemming is to occur in parsing. When set to True, terms are evaluated to see if they belong to a common parent form and the information is added to the offset table.

Default	True

stopList={castable}

specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the stopList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

synonyms={castable}

For more information about specifying the synonyms parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

tagging=True | False

specifies whether part-of-speech tagging is used in parsing.

Default	True

target="variable-name"

terms={casouttable}

For more information about specifying the terms parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termStdMultiple=double

specifies how many standard deviations above the mean to set the term cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

termTopics={casouttable}

specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.

For more information about specifying the termTopics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termWeight="ENTROPY" | "MI" | "NONE"

specifies how terms are weighted. Valid values are Entropy, None and MI (Mutual Information). MI requires a target variable in the offset table, which is generated by the tpParse action.

Alias	termWgt
Default	ENTROPY

* text="variable-name"

specifies the character variable in the documents table that contains the text to be processed.

Default	"text"

tolerance=double

specifies the stopping threshold for the iterative factorization algorithm. If 0 is specified the default value is used.

Default	1E-05
Range	0–1

topicDecision=True | False

Default	False

topics={casouttable}

specifies the output CAS table to contain the topics that are discovered.

For more information about specifying the topics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

u={casouttable}

specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.

For more information about specifying the u parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

v={casouttable}

specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.

For more information about specifying the v parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

wordPro={casouttable}

specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

For more information about specifying the wordPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

tmMine Action

R Syntax
Summary: Input and Output Tables
Parameter Descriptions

R Syntax

results <– cas.textMining.tmMine(s,

cellWeight="LOG" | "NONE",

child=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

complexTag=TRUE | FALSE,

copyVars=list("variable-name-1" <, "variable-name-2", ...>),

defaultEntitiesPriority=integer,

docId="variable-name",

docPro=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

docStdMultiple=double,

documents=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

entities="NONE" | "STD",

exactDocPro=TRUE | FALSE,

exactWeight=TRUE | FALSE,

includeEmptyDocument=TRUE | FALSE,

k=integer,

legacyNames=TRUE | FALSE,

liti=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

maxK=integer,

multiterm=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

norm="ALL" | "DOC" | "NONE" | "WORD",

nounGroups=TRUE | FALSE,

nThreads=integer,

numLabels=integer,

offset=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

parent=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

parseConfig=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

reduce=integer,

resolution="HIGH" | "LOW" | "MED",

rotate="PROMAX" | "VARIMAX",

rowPivot=double,

s=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

saveState=list(

caslib="string",

label="string",

lifetime=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

selectAttribute=list(

opType="IGNORE" | "KEEP",

tagList=list("string-1" <, "string-2", ...>)

selectEntity=list(

opType="IGNORE" | "KEEP",

tagList=list("string-1" <, "string-2", ...>)

selectPos=list(

opType="IGNORE" | "KEEP",

tagList=list("string-1" <, "string-2", ...>)

showDroppedTerms=TRUE | FALSE,

startList=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

stemming=TRUE | FALSE,

stopList=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

synonyms=list(

caslib="string",

computedOnDemand=TRUE | FALSE,

computedVars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

computedVarsProgram="string",

dataSourceOptions=list(key-1=list(any-list-or-data-type-1) <, key-2=list(any-list-or-data-type-2), ...>),

groupBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

groupByMode="NOSORT" | "REDISTRIBUTE",

name="table-name",

orderBy=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

singlePass=TRUE | FALSE,

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>),

where="where-expression",

whereTable=list(

casLib="string"

name="table-name"

vars=list( list(

format="string",

formattedLength=integer,

label="string",

name="variable-name",

nfd=integer,

nfl=integer

) <, list(...)>)

where="where-expression"

)

tagging=TRUE | FALSE,

target="variable-name",

terms=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

termStdMultiple=double,

termTopics=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

termWeight="ENTROPY" | "MI" | "NONE",

text="variable-name",

tolerance=double,

topicDecision=TRUE | FALSE,

topics=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

u=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

v=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

wordPro=list(

caslib="string",

compress=TRUE | FALSE,

indexVars=list("variable-name-1" <, "variable-name-2", ...>),

label="string",

lifetime=64-bit-integer,

maxMemSize=64-bit-integer,

memoryFormat="DVR" | "INHERIT" | "STANDARD",

name="table-name",

promote=TRUE | FALSE,

replace=TRUE | FALSE,

replication=integer,

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE",

threadBlockSize=64-bit-integer,

timeStamp="string",

where=list("string-1" <, "string-2", ...>)

)

indicates a required parameter

Summary: Input and Output Tables

If a row includes a subparameter, you can specify the name, caslib, and so on in the subparameter. Otherwise, you can specify the name, caslib, and so on in the parameter.

Parameters for Reading Input Tables
Parameter	Subparameter	Description
required parameterdocuments	—	names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.
liti	—	specifies the input CAS table that contains the LITI binary, which contains the predefined or custom concept definitions. The tmMine action can reference a concepts model that is compiled in the compileConcept action. For more information on how to do this, see the example, Referencing a Concepts Model in the tmMine Action. This parameter requires a SAS Visual Text Analytics license.
multiterm	—	specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.
startList	—	specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
stopList	—	specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.
synonyms	—	specifies the input CAS table that contains user-defined synonyms to be used in the analysis. If specified, the table must have the following variables (all varchar): Term, Parent. Termrole and parentrole variables are optional.

Parameters for Creating Output Tables
Parameter	Subparameter	Description
child	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.
docPro	—	specifies the name of the table to contain the SVD projections of the documents.
offset	—	specifies the name of the output CAS table to contain the position information about the occurrences of child terms in the document collection. The maximum output length of a tokenized term in this table is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value.
parent	—	specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.
parseConfig	—	specifies the name of the config CAS table to contain parsing configuration information.
s	—	specifies the S matrix, which is a diagonal matrix that is output in compressed form, with two variables and k rows. The variable _ID_ indicates the row and column of the entry and the variable S contains the singular values.
saveState	—	specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
termTopics	—	specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.
terms	—	specifies the output CAS table to contain the summary information about the terms in the document collection. The maximum output length of a tokenized term is 256 bytes. So tokens consisting of an extremely long sequence of letters, numbers and symbols will be truncated to less than or equal to that maximum value. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.
topics	—	specifies the output CAS table to contain the topics that are discovered.
u	—	specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.
v	—	specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.
wordPro	—	specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

Parameter Descriptions

cellWeight="LOG" | "NONE"

specifies how the elements in the term-by-document matrix (the parent output table) are weighted.

Alias	cellWgt
Default	LOG

child=list(casouttable)

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix with raw counts.

For more information about specifying the child parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

complexTag=TRUE | FALSE

Default	FALSE

copyVars=list("variable-name-1" <, "variable-name-2", ...>)

Aliases	keepVars
Aliases	keepVar

defaultEntitiesPriority=integer

Default	1
Range	0–32

* docId="variable-name"

specifies the character or numeric variable on the documents table that contains the ID of each document.

Default	"DOC_ID"

docPro=list(casouttable)

specifies the name of the table to contain the SVD projections of the documents.

For more information about specifying the docPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

docStdMultiple=double

specifies how many standard deviations above the mean to set the document cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

* documents=list(castable)

names the input CAS table of documents to be parsed. You must include a text variable specified with textVar and a document ID variable specified with docIdVar.

For more information about specifying the documents parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

entities="NONE" | "STD"

specifies whether to extract entities in parsing. If set to None, no entities are output. If set to STD, the standard entities are output.

Default	NONE

exactDocPro=TRUE | FALSE

specifies if the exact document projection values should be output. This parameter requires a SAS Visual Text Analytics license.

Default	TRUE

exactWeight=TRUE | FALSE

specifies if the exact entries on the u table are to be used in the topic computation, otherwise the values are rounded to three decimal places.

Alias	exactWeights
Default	FALSE

includeEmptyDocument=TRUE | FALSE

Indicates if empty document indicators are included on parent table. This parameter requires a SAS Visual Text Analytics license.

Default	FALSE

k=integer

Alias	numTopics
Range	1–1000

language="ARABIC" | "CHINESE" | "CROATIAN" | "CZECH" | "DANISH" | "DUTCH" | "ENGLISH" | "FARSI" | "FINNISH" | "FRENCH" | "GERMAN" | "GREEK" | "HEBREW" | "HINDI" | "HUNGARIAN" | "INDONESIAN" | "ITALIAN" | "JAPANESE" | "KAZAKH" | "KOREAN" | "NORWEGIAN" | "POLISH" | "PORTUGUESE" | "ROMANIAN" | "RUSSIAN" | "SLOVAK" | "SLOVENE" | "SPANISH" | "SWEDISH" | "TAGALOG" | "THAI" | "TURKISH" | "VIETNAMESE"

specifies the language used in the text variable of the input document table.

Default	ENGLISH

legacyNames=TRUE | FALSE

specifies whether to use the legacy variable names on tables. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Default	FALSE

liti=list(castable)

For more information about specifying the liti parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

maxK=integer

Default	10
Range	1–1000

multiterm=list(castable)

specifies the name of the CAS table that contains a list of multi-word terms and their part-of-speech types. Each multi-word term is parsed as a single token.

For more information about specifying the multiterm parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

norm="ALL" | "DOC" | "NONE" | "WORD"

Default	ALL

nounGroups=TRUE | FALSE

Default	TRUE

nThreads=integer

specifies the number of threads to be used per node. The value must be an integer. When the value is 0, the number of threads equals the number of CPUs.

Default	8
Minimum value	0

numLabels=integer

specifies the number of terms to use in the descriptive label for each topic.

Default	5
Range	1–500

offset=list(casouttable)

For more information about specifying the offset parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parent=list(casouttable)

specifies the name of the output CAS table to contain a compressed representation of the sparse term-by-document matrix.

For more information about specifying the parent parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

parseConfig=list(casouttable)

specifies the name of the config CAS table to contain parsing configuration information.

For more information about specifying the parseConfig parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Alias	scoreConfig

reduce=integer

specifies the minimum number of documents a term should be in to be kept. The value must be an integer.

Default	10
Range	1–32767

resolution="HIGH" | "LOW" | "MED"

specifies the desired resolution level for the recommended number of dimensions to be extracted by the SVD.

Default	HIGH

rotate="PROMAX" | "VARIMAX"

specifies the type of rotation used to maximize the explanatory power of each topic. A VARIMAX rotation produces uncorrelated topics and a PROMAX rotation produces correlated topics.

Default	VARIMAX

rowPivot=double

Default	-1
Range	-1–1

s=list(casouttable)

For more information about specifying the s parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

saveState=list(casouttable)

specifies the name of the table for saving the analytic score model. This parameter requires a SAS Visual Text Analytics license or a SAS Visual Data Mining and Machine Learning license.

Long form	saveState=list(name="table-name")
Shortcut form	saveState="table-name"

The casouttable value can be one or more of the following:

caslib="string"

specifies the name of the caslib for the output table.

label="string"

specifies the descriptive label to associate with the table.

lifetime=64-bit-integer

specifies the number of seconds to keep the table in memory after it is last accessed. The table is dropped if it is not accessed for the specified number of seconds.

Default	0
Minimum value	0

memoryFormat="DVR" | "INHERIT" | "STANDARD"

specifies the memory format for the output table.

Default	INHERIT

DVR

use the duplicate value reduction memory format. This memory format can reduce the memory consumption and file size when the input data contains duplicate values.

INHERIT

STANDARD

use the standard memory format.

name="table-name"

specifies the name for the output table.

promote=TRUE | FALSE

when set to True, adds the output table with a global scope. This enables other sessions to access the table, subject to access controls. The target caslib must also have a global scope.

Default	FALSE

replace=TRUE | FALSE

when set to True, overwrites an existing table that has the same name.

Default	FALSE

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

Specifies the Table Redistribution Policy when the number of worker pods increases on a running CAS server.

DEFER

Defer redistribution policy selection to higher-level entity.

NOREDIST

Do not redistribute table data when the number of worker pods changes on a running CAS server.

REBALANCE

Rebalance table data when the number of worker pods changes on a running CAS server.

selectAttribute=list(seltag)

specifies a list of attribute types to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectEntity=list(seltag)

specifies a list of entity types to be kept or ignored. If this parameter is specified, entities must be set to STD.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

selectPos=list(seltag)

specifies a list of part-of-speech tags to be kept or ignored.

The seltag value can be one or more of the following:

opType="IGNORE" | "KEEP"

specifies what to do with terms with selected tags. KEEP: terms without selected tags will be ignored. IGNORE: terms with selected tags will be ignored.

Default	KEEP

specifies a list of tags. Unsupported tags trigger a warning message.

showDroppedTerms=TRUE | FALSE

specifies whether to include terms that have a keep status of N in the TERMS output table.

Default	FALSE

startList=list(castable)

specifies the input CAS table that contains the terms that are to be kept for the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the startList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

stemming=TRUE | FALSE

specifies whether stemming is to occur in parsing. When set to True, terms are evaluated to see if they belong to a common parent form and the information is added to the offset table.

Default	TRUE

stopList=list(castable)

specifies the input CAS table that contains the terms to exclude from the analysis. If specified, the table must have the Term (varchar) variable. A Role (varchar) variable is optional.

For more information about specifying the stopList parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

synonyms=list(castable)

For more information about specifying the synonyms parameter, see the common castable (Form 1) parameter (Appendix A: Common Parameters).

tagging=TRUE | FALSE

specifies whether part-of-speech tagging is used in parsing.

Default	TRUE

target="variable-name"

terms=list(casouttable)

For more information about specifying the terms parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termStdMultiple=double

specifies how many standard deviations above the mean to set the term cutoff. This parameter requires a SAS Visual Text Analytics license.

Default	1
Range	0–10

termTopics=list(casouttable)

specifies the name of the output CAS table to contain the term-by-topic sparse matrix information.

For more information about specifying the termTopics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

termWeight="ENTROPY" | "MI" | "NONE"

specifies how terms are weighted. Valid values are Entropy, None and MI (Mutual Information). MI requires a target variable in the offset table, which is generated by the tpParse action.

Alias	termWgt
Default	ENTROPY

* text="variable-name"

specifies the character variable in the documents table that contains the text to be processed.

Default	"text"

tolerance=double

specifies the stopping threshold for the iterative factorization algorithm. If 0 is specified the default value is used.

Default	1E-05
Range	0–1

topicDecision=TRUE | FALSE

Default	FALSE

topics=list(casouttable)

specifies the output CAS table to contain the topics that are discovered.

For more information about specifying the topics parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

u=list(casouttable)

specifies the U matrix, which contains the left singular vectors. The matrix U is number of terms by k+1.

For more information about specifying the u parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

v=list(casouttable)

specifies the transpose of the matrix containing the right singular vectors. The matrix V is number of documents by k+1.

For more information about specifying the v parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

wordPro=list(casouttable)

specifies the table to contain the projections of the terms. If k dimensions of the SVD are found and the input data set contains n terms, this table will have n rows and k+1 columns.

For more information about specifying the wordPro parameter, see the common casouttable (Form 1) parameter (Appendix A: Common Parameters).

Last updated: November 23, 2025

tmMine Action

CASL Syntax

Summary: Input and Output Tables

Parameter Descriptions

cellWeight="LOG" | "NONE"

child={casouttable}

complexTag=TRUE | FALSE

copyVars={"variable-name-1" <, "variable-name-2", ...>}

defaultEntitiesPriority=integer

* docId="variable-name"

docPro={casouttable}

docStdMultiple=double

* documents={castable}

entities="NONE" | "STD"

exactDocPro=TRUE | FALSE

exactWeight=TRUE | FALSE

includeEmptyDocument=TRUE | FALSE

k=integer

legacyNames=TRUE | FALSE

liti={castable}

maxK=integer

multiterm={castable}

norm="ALL" | "DOC" | "NONE" | "WORD"

nounGroups=TRUE | FALSE

nThreads=integer

numLabels=integer

offset={casouttable}

parent={casouttable}

parseConfig={casouttable}

reduce=integer

resolution="HIGH" | "LOW" | "MED"

rotate="PROMAX" | "VARIMAX"

rowPivot=double

s={casouttable}

saveState={casouttable}

caslib="string"

label="string"

lifetime=64-bit-integer

memoryFormat="DVR" | "INHERIT" | "STANDARD"

DVR

INHERIT

STANDARD

name="table-name"

promote=TRUE | FALSE

replace=TRUE | FALSE

tableRedistUpPolicy="DEFER" | "NOREDIST" | "REBALANCE"

DEFER

NOREDIST

REBALANCE

selectAttribute={seltag}

opType="IGNORE" | "KEEP"

* tagList={"string-1" <, "string-2", ...>}

selectEntity={seltag}

opType="IGNORE" | "KEEP"

* tagList={"string-1" <, "string-2", ...>}

selectPos={seltag}

opType="IGNORE" | "KEEP"

* tagList={"string-1" <, "string-2", ...>}

showDroppedTerms=TRUE | FALSE

startList={castable}

stemming=TRUE | FALSE

stopList={castable}

synonyms={castable}

tagging=TRUE | FALSE

target="variable-name"

terms={casouttable}

termStdMultiple=double

termTopics={casouttable}

termWeight="ENTROPY" | "MI" | "NONE"

* text="variable-name"

tolerance=double

topicDecision=TRUE | FALSE

topics={casouttable}

u={casouttable}

v={casouttable}

wordPro={casouttable}

tmMine Action

Lua Syntax

Summary: Input and Output Tables

Parameter Descriptions