Speech-To-Text base support

pull/748/head
pabloFuente 2022-10-10 11:45:27 +02:00
parent 9fe3a6856c
commit d700dd3e97
10 changed files with 217 additions and 40 deletions

View File

@ -868,7 +868,8 @@ export class OpenVidu {
iceCandidate: this.session.recvIceCandidate.bind(this.session), iceCandidate: this.session.recvIceCandidate.bind(this.session),
mediaError: this.session.onMediaError.bind(this.session), mediaError: this.session.onMediaError.bind(this.session),
masterNodeCrashedNotification: this.onMasterNodeCrashedNotification.bind(this), masterNodeCrashedNotification: this.onMasterNodeCrashedNotification.bind(this),
forciblyReconnectSubscriber: this.session.onForciblyReconnectSubscriber.bind(this.session) forciblyReconnectSubscriber: this.session.onForciblyReconnectSubscriber.bind(this.session),
speechToTextMessage: this.session.onSpeechToTextMessage.bind(this.session)
} }
}; };
this.jsonRpcClient = new RpcBuilder.clients.JsonRpcClient(config); this.jsonRpcClient = new RpcBuilder.clients.JsonRpcClient(config);

View File

@ -35,6 +35,7 @@ import { FilterEvent } from '../OpenViduInternal/Events/FilterEvent';
import { RecordingEvent } from '../OpenViduInternal/Events/RecordingEvent'; import { RecordingEvent } from '../OpenViduInternal/Events/RecordingEvent';
import { SessionDisconnectedEvent } from '../OpenViduInternal/Events/SessionDisconnectedEvent'; import { SessionDisconnectedEvent } from '../OpenViduInternal/Events/SessionDisconnectedEvent';
import { SignalEvent } from '../OpenViduInternal/Events/SignalEvent'; import { SignalEvent } from '../OpenViduInternal/Events/SignalEvent';
import { SpeechToTextEvent } from '../OpenViduInternal/Events/SpeechToTextEvent';
import { StreamEvent } from '../OpenViduInternal/Events/StreamEvent'; import { StreamEvent } from '../OpenViduInternal/Events/StreamEvent';
import { StreamPropertyChangedEvent } from '../OpenViduInternal/Events/StreamPropertyChangedEvent'; import { StreamPropertyChangedEvent } from '../OpenViduInternal/Events/StreamPropertyChangedEvent';
import { ConnectionPropertyChangedEvent } from '../OpenViduInternal/Events/ConnectionPropertyChangedEvent'; import { ConnectionPropertyChangedEvent } from '../OpenViduInternal/Events/ConnectionPropertyChangedEvent';
@ -597,7 +598,6 @@ export class Session extends EventDispatcher {
* @returns A Promise (to which you can optionally subscribe to) that is resolved if the message successfully reached openvidu-server and rejected with an Error object if not. _This doesn't * @returns A Promise (to which you can optionally subscribe to) that is resolved if the message successfully reached openvidu-server and rejected with an Error object if not. _This doesn't
* mean that openvidu-server could resend the message to all the listed receivers._ * mean that openvidu-server could resend the message to all the listed receivers._
*/ */
/* tslint:disable:no-string-literal */
signal(signal: SignalOptions): Promise<void> { signal(signal: SignalOptions): Promise<void> {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
if (!this.sessionConnected()) { if (!this.sessionConnected()) {
@ -643,7 +643,55 @@ export class Session extends EventDispatcher {
); );
}); });
} }
/* tslint:enable:no-string-literal */
/**
* Subscribe to the Speech-To-Text events for this [[Stream]]. The Session object will emit [[SpeechToTextEvent]] for the Stream
* when speech is detected in its audio track.
*
* @returns A Promise (to which you can optionally subscribe to) that is resolved if the speech-to-text subscription
* was successful and rejected with an Error object if not.
*/
subscribeToSpeechToText(stream: Stream): Promise<void> {
return new Promise((resolve, reject) => {
this.openvidu.sendRequest(
'subscribeToSpeechToText',
{
connectionIds: [stream.connection.connectionId]
},
(error, response) => {
if (!!error) {
return reject(error);
} else {
return resolve();
}
}
);
});
}
/**
* Unsubscribe from the Speech-To-Text events for this [[Stream]].
*
* @returns A Promise (to which you can optionally subscribe to) that is resolved if the speech-to-text subscription
* was successful and rejected with an Error object if not.
*/
unsubscribeFromSpeechToText(stream: Stream): Promise<void> {
return new Promise((resolve, reject) => {
this.openvidu.sendRequest(
'unsubscribeFromSpeechToText',
{
connectionIds: [stream.connection.connectionId]
},
(error, response) => {
if (!!error) {
return reject(error);
} else {
return resolve();
}
}
);
});
}
/** /**
* See [[EventDispatcher.on]] * See [[EventDispatcher.on]]
@ -1156,8 +1204,7 @@ export class Session extends EventDispatcher {
const stream: Stream = connection.stream!; const stream: Stream = connection.stream!;
if (!stream || !stream.filter) { if (!stream || !stream.filter) {
return logger.error( return logger.error(
`Filter event of type "${event.eventType}" dispatched for stream ${stream.streamId} but there is no ${ `Filter event of type "${event.eventType}" dispatched for stream ${stream.streamId} but there is no ${!stream ? 'stream' : 'filter'
!stream ? 'stream' : 'filter'
} defined` } defined`
); );
} }
@ -1267,6 +1314,18 @@ export class Session extends EventDispatcher {
} }
} }
/**
* @hidden
*/
async onSpeechToTextMessage(event: { streamId: string; connectionId: string; sessionId: string, timestamp: number, raw: string }): Promise<void> {
const connection = await this.getConnection(event.connectionId, 'No connection found for connectionId ' + event.connectionId);
const ev = new SpeechToTextEvent(this, connection, event.timestamp, event.raw);
this.ee.emitEvent('speechToText', [ev]);
if (ev.raw.includes('text')) {
console.log(ev);
}
}
/** /**
* @hidden * @hidden
*/ */

View File

@ -24,6 +24,7 @@ import { PublisherSpeakingEvent } from '../PublisherSpeakingEvent';
import { RecordingEvent } from '../RecordingEvent'; import { RecordingEvent } from '../RecordingEvent';
import { SessionDisconnectedEvent } from '../SessionDisconnectedEvent'; import { SessionDisconnectedEvent } from '../SessionDisconnectedEvent';
import { SignalEvent } from '../SignalEvent'; import { SignalEvent } from '../SignalEvent';
import { SpeechToTextEvent } from '../SpeechToTextEvent';
import { StreamEvent } from '../StreamEvent'; import { StreamEvent } from '../StreamEvent';
import { StreamPropertyChangedEvent } from '../StreamPropertyChangedEvent'; import { StreamPropertyChangedEvent } from '../StreamPropertyChangedEvent';
@ -158,6 +159,13 @@ export interface SessionEventMap extends EventMap {
*/ */
networkQualityLevelChanged: NetworkQualityLevelChangedEvent; networkQualityLevelChanged: NetworkQualityLevelChangedEvent;
/**
* **This feature is part of OpenVidu Pro tier** <a href="https://docs.openvidu.io/en/stable/openvidu-pro/" style="display: inline-block; background-color: rgb(0, 136, 170); color: white; font-weight: bold; padding: 0px 5px; margin-right: 5px; border-radius: 3px; font-size: 13px; line-height:21px; font-family: Montserrat, sans-serif">PRO</a>
*
* Event dispatched when a speech-to-text message has been received for certain Stream. See [Speech To Text](/en/stable/advanced-features/speech-to-text/).
*/
speechToTextMessage: SpeechToTextEvent;
/** /**
* Event dispatched when the local user has lost its connection to the session, and starts the automatic reconnection process. * Event dispatched when the local user has lost its connection to the session, and starts the automatic reconnection process.
* *

View File

@ -0,0 +1,56 @@
/*
* (C) Copyright 2017-2022 OpenVidu (https://openvidu.io)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import { Event } from './Event';
import { Connection } from '../../OpenVidu/Connection';
import { Session } from '../../OpenVidu/Session';
/**
* Triggered by [[SessionEventMap.signal]]
*/
export class SpeechToTextEvent extends Event {
/**
* The connectionId of the
*/
connection: Connection;
/**
*
*/
timestamp: number;
/**
* The original event from the speech to text engine. This can vary depending on the engine
*/
raw: string;
/**
* @hidden
*/
constructor(target: Session, connection: Connection, timestamp: number, raw: string) {
super(false, target, 'speechToText');
this.connection = connection;
this.timestamp = timestamp;
this.raw = raw;
}
/**
* @hidden
*/
// tslint:disable-next-line:no-empty
callDefaultBehavior() { }
}

View File

@ -28,6 +28,7 @@ export { StreamPropertyChangedEvent } from './OpenViduInternal/Events/StreamProp
export { ConnectionPropertyChangedEvent } from './OpenViduInternal/Events/ConnectionPropertyChangedEvent'; export { ConnectionPropertyChangedEvent } from './OpenViduInternal/Events/ConnectionPropertyChangedEvent';
export { FilterEvent } from './OpenViduInternal/Events/FilterEvent'; export { FilterEvent } from './OpenViduInternal/Events/FilterEvent';
export { NetworkQualityLevelChangedEvent } from './OpenViduInternal/Events/NetworkQualityLevelChangedEvent'; export { NetworkQualityLevelChangedEvent } from './OpenViduInternal/Events/NetworkQualityLevelChangedEvent';
export { SpeechToTextEvent } from './OpenViduInternal/Events/SpeechToTextEvent';
export { ExceptionEvent, ExceptionEventName } from './OpenViduInternal/Events/ExceptionEvent'; export { ExceptionEvent, ExceptionEventName } from './OpenViduInternal/Events/ExceptionEvent';
export { Capabilities } from './OpenViduInternal/Interfaces/Public/Capabilities'; export { Capabilities } from './OpenViduInternal/Interfaces/Public/Capabilities';

View File

@ -147,6 +147,12 @@ public class ProtocolElements {
public static final String FORCIBLYRECONNECTSUBSCRIBER_STREAMID_PARAM = "streamId"; public static final String FORCIBLYRECONNECTSUBSCRIBER_STREAMID_PARAM = "streamId";
public static final String FORCIBLYRECONNECTSUBSCRIBER_SDPOFFER_PARAM = "sdpOffer"; public static final String FORCIBLYRECONNECTSUBSCRIBER_SDPOFFER_PARAM = "sdpOffer";
public static final String SUBSCRIBETOSPEECHTOTEXT_METHOD = "subscribeToSpeechToText";
public static final String SUBSCRIBETOSPEECHTOTEXT_CONNECTIONIDS_PARAM = "connectionIds";
public static final String UNSUBSCRIBEFROMSPEECHTOTEXT_METHOD = "unsubscribeFromSpeechToText";
public static final String UNSUBSCRIBEFROMSPEECHTOTEXT_CONNECTIONIDS_PARAM = "connectionIds";
// ---------------------------- SERVER RESPONSES & EVENTS ----------------- // ---------------------------- SERVER RESPONSES & EVENTS -----------------
public static final String PARTICIPANTJOINED_METHOD = "participantJoined"; public static final String PARTICIPANTJOINED_METHOD = "participantJoined";
@ -221,7 +227,13 @@ public class ProtocolElements {
public static final String RECORDINGSTOPPED_METHOD = "recordingStopped"; public static final String RECORDINGSTOPPED_METHOD = "recordingStopped";
public static final String RECORDINGSTOPPED_ID_PARAM = "id"; public static final String RECORDINGSTOPPED_ID_PARAM = "id";
public static final String CUSTOM_NOTIFICATION = "custonNotification"; public static final String SPEECHTOTEXTMESSAGE_METHOD = "speechToTextMessage";
public static final String SPEECHTOTEXTMESSAGE_TIMESTAMP_PARAM = "timestamp";
public static final String SPEECHTOTEXTMESSAGE_SESSIONID_PARAM = "sessionId";
public static final String SPEECHTOTEXTMESSAGE_CONNECTIONID_PARAM = "connectionId";
public static final String SPEECHTOTEXTMESSAGE_RAW_PARAM = "raw";
public static final String CUSTOM_NOTIFICATION = "customNotification";
public static final String RECORDER_PARTICIPANT_PUBLICID = "RECORDER"; public static final String RECORDER_PARTICIPANT_PUBLICID = "RECORDER";
} }

View File

@ -205,8 +205,7 @@ public class SessionEventsHandler {
IceServerProperties defaultIceServer = new IceServerProperties.Builder() IceServerProperties defaultIceServer = new IceServerProperties.Builder()
.url("turn:" + coturnIp + ":" + openviduConfig.getCoturnPort()) .url("turn:" + coturnIp + ":" + openviduConfig.getCoturnPort())
.username(participant.getToken().getTurnCredentials().getUsername()) .username(participant.getToken().getTurnCredentials().getUsername())
.credential(participant.getToken().getTurnCredentials().getCredential()) .credential(participant.getToken().getTurnCredentials().getCredential()).build();
.build();
defaultCustomIceServers.add(defaultIceServer.toJson()); defaultCustomIceServers.add(defaultIceServer.toJson());
result.add(ProtocolElements.PARTICIPANTJOINED_CUSTOM_ICE_SERVERS, defaultCustomIceServers); result.add(ProtocolElements.PARTICIPANTJOINED_CUSTOM_ICE_SERVERS, defaultCustomIceServers);
} }
@ -693,6 +692,10 @@ public class SessionEventsHandler {
public void onMediaNodeRecovered(Kms kms, String environmentId, long timeOfConnection) { public void onMediaNodeRecovered(Kms kms, String environmentId, long timeOfConnection) {
} }
public void onSpeechToTextMessage(String sessionId, String connectionId, long timestamp, String text,
Set<Participant> subscribedParticipants) {
}
public void storeRecordingToSendClientEvent(Recording recording) { public void storeRecordingToSendClientEvent(Recording recording) {
recordingsToSendClientEvents.put(recording.getSessionId(), recording); recordingsToSendClientEvents.put(recording.getSessionId(), recording);
} }

View File

@ -39,6 +39,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement; import com.google.gson.JsonElement;
import com.google.gson.JsonObject; import com.google.gson.JsonObject;
import com.google.gson.JsonParser; import com.google.gson.JsonParser;
@ -187,6 +188,12 @@ public abstract class SessionManager {
public abstract void onVideoData(Participant participant, Integer transactionId, Integer height, Integer width, public abstract void onVideoData(Participant participant, Integer transactionId, Integer height, Integer width,
Boolean videoActive, Boolean audioActive); Boolean videoActive, Boolean audioActive);
public abstract void onSubscribeToSpeechToText(Participant participant, Integer transactionId,
JsonArray connectionIds);
public abstract void onUnsubscribeFromSpeechToText(Participant participant, Integer transactionId,
JsonArray connectionIds);
public void onEcho(String participantPrivateId, Integer requestId) { public void onEcho(String participantPrivateId, Integer requestId) {
sessionEventsHandler.onEcho(participantPrivateId, requestId); sessionEventsHandler.onEcho(participantPrivateId, requestId);
} }

View File

@ -42,6 +42,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement; import com.google.gson.JsonElement;
import com.google.gson.JsonObject; import com.google.gson.JsonObject;
@ -1196,6 +1197,14 @@ public class KurentoSessionManager extends SessionManager {
forciblyReconnect); forciblyReconnect);
} }
@Override
public void onSubscribeToSpeechToText(Participant participant, Integer transactionId, JsonArray connectionIds) {
}
@Override
public void onUnsubscribeFromSpeechToText(Participant participant, Integer transactionId, JsonArray connectionIds) {
}
private String mungeSdpOffer(Session kSession, Participant participant, String sdpOffer, boolean isPublisher) { private String mungeSdpOffer(Session kSession, Participant participant, String sdpOffer, boolean isPublisher) {
boolean isTranscodingAllowed = kSession.getSessionProperties().isTranscodingAllowed(); boolean isTranscodingAllowed = kSession.getSessionProperties().isTranscodingAllowed();
VideoCodec forcedVideoCodec = kSession.getSessionProperties().forcedVideoCodecResolved(); VideoCodec forcedVideoCodec = kSession.getSessionProperties().forcedVideoCodecResolved();

View File

@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.HttpHeaders; import org.springframework.http.HttpHeaders;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement; import com.google.gson.JsonElement;
import com.google.gson.JsonObject; import com.google.gson.JsonObject;
import com.google.gson.JsonParser; import com.google.gson.JsonParser;
@ -180,6 +181,12 @@ public class RpcHandler extends DefaultJsonRpcHandler<JsonObject> {
case ProtocolElements.VIDEODATA_METHOD: case ProtocolElements.VIDEODATA_METHOD:
updateVideoData(rpcConnection, request); updateVideoData(rpcConnection, request);
break; break;
case ProtocolElements.SUBSCRIBETOSPEECHTOTEXT_METHOD:
subscribeToSpeechToText(rpcConnection, request);
break;
case ProtocolElements.UNSUBSCRIBEFROMSPEECHTOTEXT_METHOD:
unsubscribeFromSpeechToText(rpcConnection, request);
break;
case ProtocolElements.ECHO_METHOD: case ProtocolElements.ECHO_METHOD:
echo(rpcConnection, request); echo(rpcConnection, request);
break; break;
@ -700,6 +707,20 @@ public class RpcHandler extends DefaultJsonRpcHandler<JsonObject> {
} }
} }
private void subscribeToSpeechToText(RpcConnection rpcConnection, Request<JsonObject> request) {
Participant participant = sanityCheckOfSession(rpcConnection, "subscribeToSpeechToText");
JsonArray connectionIds = (JsonArray) RpcHandler.getParam(request,
ProtocolElements.SUBSCRIBETOSPEECHTOTEXT_CONNECTIONIDS_PARAM);
sessionManager.onSubscribeToSpeechToText(participant, request.getId(), connectionIds);
}
private void unsubscribeFromSpeechToText(RpcConnection rpcConnection, Request<JsonObject> request) {
Participant participant = sanityCheckOfSession(rpcConnection, "unsubscribeFromSpeechToText");
JsonArray connectionIds = (JsonArray) RpcHandler.getParam(request,
ProtocolElements.UNSUBSCRIBEFROMSPEECHTOTEXT_CONNECTIONIDS_PARAM);
sessionManager.onUnsubscribeFromSpeechToText(participant, request.getId(), connectionIds);
}
private void echo(RpcConnection rpcConnection, Request<JsonObject> request) { private void echo(RpcConnection rpcConnection, Request<JsonObject> request) {
sessionManager.onEcho(rpcConnection.getParticipantPrivateId(), request.getId()); sessionManager.onEcho(rpcConnection.getParticipantPrivateId(), request.getId());
} }