SimpleTokenizer

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def shaping(tokens: Array[Float], sequenceLen: Int, trunc: String = "pre"): Array[Float]

Shape the token sequence to the specified length.
Shape the token sequence to the specified length. The sequence would be either padded or truncated.
sequenceLen
the desired seq length
trunc
truncated from pre or post.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def toTokens(text: String, word2Meta: Map[String, WordMeta]): Array[Float]

Transform sample text into tokens and ignore those unknown tokens.
Transform sample text into tokens and ignore those unknown tokens.
word2Meta
Indicate the included words.
def toTokens(text: String, lower: Boolean = true): Array[String]

Simple tokenizer to split text into separated tokens.
Simple tokenizer to split text into separated tokens.
text
text to be split.
lower
convert to lower case or not.
returns
An array of separated tokens.
def vectorization(tokens: Array[Float], embeddingSize: Int, word2Vec: Map[Float, Array[Float]]): Array[Array[Float]]

Transform word to pre-trained vector.
Transform word to pre-trained vector.
embeddingSize
size of the pre-trained vector
word2Vec
pre-trained word2Vec
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

object SimpleTokenizer

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def shaping(tokens: Array[Float], sequenceLen: Int, trunc: String = "pre"): Array[Float]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

def toTokens(text: String, word2Meta: Map[String, WordMeta]): Array[Float]

def toTokens(text: String, lower: Boolean = true): Array[String]

def vectorization(tokens: Array[Float], embeddingSize: Int, word2Vec: Map[Float, Array[Float]]): Array[Array[Float]]

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped