In the MAYZ toolkit, data, such as parse trees and derivations, are represented with feature structures. This page explains the definitions of the feature structures.
word <- [bot] + [INPUT\string(0), %% input word itself SURFACE\string(1), %% preprocessed input word BASE\string(2), %% base form of the input word INPUT_POS\string(5), %% input POS itself POS\string(6), %% preprocessed POS BASE_POS\string(7), %% POS of the base form of the input word POSITION\integer(10) %% the position of the input word ].
A phrase structure of an input treebank (parse tree) is represented with the following feature structure.
%% head/argument/modifier marks
head_mark <- [bot].
non_head <- [head_mark]. %% other than 'head'
argument <- [non_head]. %% for argument
modifier <- [non_head]. %% for modifier
empty <- [head_mark]. %% for empty trees
non_empty <- [head_mark]. %% for non-empty trees
head <- [head_mark, non_empty]. %% head
non_head_empty <- [non_head, empty]. %% empty non-head
non_head_non_empty <- [non_head, non_empty]. %% non-empty non-head
argument_empty <- [argument, non_head_empty]. %% empty argument
argument_non_empty <- [argument, non_head_non_empty]. %% non-empty argument
modifier_empty <- [modifier, non_head_empty]. %% empty modifier
modifier_non_empty <- [modifier, non_head_non_empty]. %% non-empty modifier
%% representation of PropBank-style annotations
propbank_label <- [bot] +
[PROP_ID\integer(0)]. %% ID number uniquely assigned in a sentence
propbank_arg <- [propbank_label] + %% annotation for arguments
[ARG_POS\string(1)]. %% argument label ("ARG1", ...)
propbank_rel <- [propbank_label] + %% annotation for predicates
[PRED_NAME\string(1), %% name of a predicate
PRED_SEM_ID\string(2)]. %% semantic ID of a predicate
%% list of 'propbank_label'
prop_list <- [list].
prop_cons <- [prop_list, cons] + [hd\propbank_label, tl\prop_list].
prop_nil <- [prop_list, nil].
%% a node in a parse tree
tree_node <- [bot] +
[SYM\string(0), %% symbol of the node
FUNC\list(1), %% function labels (SBJ, TPC, etc.)
HEAD_MARK\head_mark(2), %% head/argument/modifier marks
PROP_LIST\prop_list(5), %% for PropBank-style annotations
NODE_SIGN\bot(10), %% sign corresponding to the node
ANNOT\bot(11) %% other annotations
].
tree_node_nts <- [tree_node] + %% nonterminal node
[ID\list(3), %% coindex ID assigned to the node
SCHEMA_NAME\bot(4) %% schema name
].
tree_node_term <- [tree_node] + %% terminal node
[WORD\word(3)]. %% word corresponding to the node
tree_node_empty <- [tree_node] + %% empty terminal node
[COIND\list(3), %% coindex IDs
EMPTY_WORD\string(4), %% string representing an empty word
COIND_NODE\list(5) %% coindexed nodes
].
%% parse tree
tree_base <- [bot] +
[TREE_NODE\tree_node(0)]. %% the node of the tree
tree <- [tree_base]. %% terminal/nonterminal of the tree
tree_nts <- [tree] + %% nonterminal
[TREE_NODE\tree_node_nts,
TREE_DTRS\list(20)]. %% daughter trees
tree_term <- [tree] + %% terminal
[TREE_NODE\tree_node_term].
tree_empty <- [tree] + %% empty terminal
[TREE_NODE\tree_node_empty].
%% used for pattern matching of trees
tree_any <- [tree_base] +
[ANY_TREES\list(10)]. %% a list of matched trees
For 'tree_any', see "Pattern matching of trees".
derivation <- [bot] + %% node in a derivation [DERIV_SIGN\bot(5)]. %% sign corresponding to the node derivation_internal <- [derivation] + %% internal node of a derivation [DERIV_SCHEMA\bot(0), %% name of the applied schema DERIV_DTRS\list(10)]. %% daughter derivations derivation_terminal <- [derivation] + %% terminal node of a derivation [TERM_WORD\word(0), %% word corresponding to the node TERM_TEMPLATE\lex_template(1), %% template name assigned to the word LEXENTRY_SIGN\bot(10), %% sign of a lexical entry LEXEME_SIGN\bot(11) %% sign of a lexeme ].