|
1 |
package rst.tts;
|
|
2 |
|
|
3 |
option java_outer_classname = "TextWithProsodyType";
|
|
4 |
|
|
5 |
/**
|
|
6 |
* Represents a text and the its prosody information. Text can be
|
|
7 |
* anything from a few syllables to multiple sentences. The prosody is
|
|
8 |
* constant for the whole text.
|
|
9 |
|
|
10 |
* @author Soeren Klett <sklett@techfak.uni-bielefeld.de>
|
|
11 |
* @author Birte Carlmeyer <bcarlmey@techfak.uni-bielefeld.de>
|
|
12 |
*/
|
|
13 |
message TextWithProsody {
|
|
14 |
|
|
15 |
/**
|
|
16 |
* The text to which the prosody information is attached.
|
|
17 |
*
|
|
18 |
* Must not be empty.
|
|
19 |
*/
|
|
20 |
optional string text = 1;
|
|
21 |
|
|
22 |
/**
|
|
23 |
* Describes a constant prosody for an amount of text. It is assumed
|
|
24 |
* that there are application-specific default values for all
|
|
25 |
* aspects of the prosody (pitch, range, etc.). The prosody can be
|
|
26 |
* expressed either in relation to the baseline values or with
|
|
27 |
* absolute values.
|
|
28 |
*
|
|
29 |
* All aspects of the prosody are optional. In case an aspect is not
|
|
30 |
* defined, an executing TTS engine can decide on these aspects.
|
|
31 |
*/
|
|
32 |
message Prosody {
|
|
33 |
|
|
34 |
/**
|
|
35 |
* Specifies the value for a prosody aspect using multiple
|
|
36 |
* possible formulations, which are represented by the different
|
|
37 |
* attributes of the message.
|
|
38 |
*
|
|
39 |
* Exactly one of the attributes needs to be set.
|
|
40 |
*/
|
|
41 |
message Value {
|
|
42 |
|
|
43 |
/**
|
|
44 |
* Absolut value in the target unit.
|
|
45 |
*/
|
|
46 |
optional float absolute = 1;
|
|
47 |
|
|
48 |
/**
|
|
49 |
* Offset to an application-specific default value given in
|
|
50 |
* the target unit.
|
|
51 |
*/
|
|
52 |
optional float relative = 2;
|
|
53 |
|
|
54 |
/**
|
|
55 |
* Percentage of the application-specific default value.
|
|
56 |
*
|
|
57 |
* 100% equals 1.0.
|
|
58 |
*/
|
|
59 |
// @constraint(value > 0)
|
|
60 |
optional float percentage = 3 [default = 1];
|
|
61 |
|
|
62 |
}
|
|
63 |
|
|
64 |
/**
|
|
65 |
* The baseline pitch for the contained words.
|
|
66 |
*
|
|
67 |
* Absolute and relative values are expressed in Hz.
|
|
68 |
*/
|
|
69 |
optional Value pitch = 1;
|
|
70 |
|
|
71 |
/**
|
|
72 |
* The pitch range (variability) of the contained words.
|
|
73 |
*
|
|
74 |
* Absolute and relative values are expressed in Hz.
|
|
75 |
*/
|
|
76 |
optional Value range = 2;
|
|
77 |
|
|
78 |
/**
|
|
79 |
* The desired change of volume of the contained words.
|
|
80 |
*
|
|
81 |
* Absolute and relative values are expressed in dB.
|
|
82 |
*/
|
|
83 |
optional Value volume = 3;
|
|
84 |
|
|
85 |
/**
|
|
86 |
* A value in milliseconds for the desired time to take to read
|
|
87 |
* the contained words.
|
|
88 |
*
|
|
89 |
* TODO we usually use SI units. Can we change this to float in
|
|
90 |
* seconds?
|
|
91 |
*/
|
|
92 |
// @unit(millisecond)
|
|
93 |
// @constraint(value >= 0)
|
|
94 |
optional uint32 duration = 4;
|
|
95 |
|
|
96 |
/**
|
|
97 |
* Relative speech rate given as a percentage of the
|
|
98 |
* application-specific base rate.
|
|
99 |
*/
|
|
100 |
// @constraint(value >= 0)
|
|
101 |
optional float rate = 5 [default = 1];
|
|
102 |
|
|
103 |
}
|
|
104 |
|
|
105 |
/**
|
|
106 |
* Prosody to be applied to everything contained in @ref .text.
|
|
107 |
*
|
|
108 |
* TODO since all attributes inside the prosody are optional, we
|
|
109 |
* can always require the instance, which conforms with the message
|
|
110 |
* name. Is this ok?
|
|
111 |
*/
|
|
112 |
required Prosody prosody = 2;
|
|
113 |
|
|
114 |
}
|
0 |
|
-
|