From d700dd3e97f6453d50f6ac59a89eb82f7dab6200 Mon Sep 17 00:00:00 2001 From: pabloFuente Date: Mon, 10 Oct 2022 11:45:27 +0200 Subject: [PATCH] Speech-To-Text base support --- openvidu-browser/src/OpenVidu/OpenVidu.ts | 3 +- openvidu-browser/src/OpenVidu/Session.ts | 131 +++++++++++++----- .../Events/EventMap/SessionEventMap.ts | 8 ++ .../Events/SpeechToTextEvent.ts | 56 ++++++++ openvidu-browser/src/index.ts | 1 + .../client/internal/ProtocolElements.java | 14 +- .../server/core/SessionEventsHandler.java | 7 +- .../openvidu/server/core/SessionManager.java | 7 + .../kurento/core/KurentoSessionManager.java | 9 ++ .../io/openvidu/server/rpc/RpcHandler.java | 21 +++ 10 files changed, 217 insertions(+), 40 deletions(-) create mode 100644 openvidu-browser/src/OpenViduInternal/Events/SpeechToTextEvent.ts diff --git a/openvidu-browser/src/OpenVidu/OpenVidu.ts b/openvidu-browser/src/OpenVidu/OpenVidu.ts index 53124eee..3d2e34f5 100644 --- a/openvidu-browser/src/OpenVidu/OpenVidu.ts +++ b/openvidu-browser/src/OpenVidu/OpenVidu.ts @@ -868,7 +868,8 @@ export class OpenVidu { iceCandidate: this.session.recvIceCandidate.bind(this.session), mediaError: this.session.onMediaError.bind(this.session), masterNodeCrashedNotification: this.onMasterNodeCrashedNotification.bind(this), - forciblyReconnectSubscriber: this.session.onForciblyReconnectSubscriber.bind(this.session) + forciblyReconnectSubscriber: this.session.onForciblyReconnectSubscriber.bind(this.session), + speechToTextMessage: this.session.onSpeechToTextMessage.bind(this.session) } }; this.jsonRpcClient = new RpcBuilder.clients.JsonRpcClient(config); diff --git a/openvidu-browser/src/OpenVidu/Session.ts b/openvidu-browser/src/OpenVidu/Session.ts index 39ac6dbc..923ae261 100644 --- a/openvidu-browser/src/OpenVidu/Session.ts +++ b/openvidu-browser/src/OpenVidu/Session.ts @@ -35,6 +35,7 @@ import { FilterEvent } from '../OpenViduInternal/Events/FilterEvent'; import { RecordingEvent } from '../OpenViduInternal/Events/RecordingEvent'; import { SessionDisconnectedEvent } from '../OpenViduInternal/Events/SessionDisconnectedEvent'; import { SignalEvent } from '../OpenViduInternal/Events/SignalEvent'; +import { SpeechToTextEvent } from '../OpenViduInternal/Events/SpeechToTextEvent'; import { StreamEvent } from '../OpenViduInternal/Events/StreamEvent'; import { StreamPropertyChangedEvent } from '../OpenViduInternal/Events/StreamPropertyChangedEvent'; import { ConnectionPropertyChangedEvent } from '../OpenViduInternal/Events/ConnectionPropertyChangedEvent'; @@ -180,12 +181,12 @@ export class Session extends EventDispatcher { new OpenViduError( OpenViduErrorName.BROWSER_NOT_SUPPORTED, 'Browser ' + - platform.getName() + - ' (version ' + - platform.getVersion() + - ') for ' + - platform.getFamily() + - ' is not supported in OpenVidu' + platform.getName() + + ' (version ' + + platform.getVersion() + + ') for ' + + platform.getFamily() + + ' is not supported in OpenVidu' ) ); } @@ -468,7 +469,7 @@ export class Session extends EventDispatcher { return reject( new Error( 'The associated Connection object of this Publisher is not your local Connection. ' + - "Only moderators can force unpublish on remote Streams via 'forceUnpublish' method" + "Only moderators can force unpublish on remote Streams via 'forceUnpublish' method" ) ); } else { @@ -597,7 +598,6 @@ export class Session extends EventDispatcher { * @returns A Promise (to which you can optionally subscribe to) that is resolved if the message successfully reached openvidu-server and rejected with an Error object if not. _This doesn't * mean that openvidu-server could resend the message to all the listed receivers._ */ - /* tslint:disable:no-string-literal */ signal(signal: SignalOptions): Promise { return new Promise((resolve, reject) => { if (!this.sessionConnected()) { @@ -643,7 +643,55 @@ export class Session extends EventDispatcher { ); }); } - /* tslint:enable:no-string-literal */ + + /** + * Subscribe to the Speech-To-Text events for this [[Stream]]. The Session object will emit [[SpeechToTextEvent]] for the Stream + * when speech is detected in its audio track. + * + * @returns A Promise (to which you can optionally subscribe to) that is resolved if the speech-to-text subscription + * was successful and rejected with an Error object if not. + */ + subscribeToSpeechToText(stream: Stream): Promise { + return new Promise((resolve, reject) => { + this.openvidu.sendRequest( + 'subscribeToSpeechToText', + { + connectionIds: [stream.connection.connectionId] + }, + (error, response) => { + if (!!error) { + return reject(error); + } else { + return resolve(); + } + } + ); + }); + } + + /** + * Unsubscribe from the Speech-To-Text events for this [[Stream]]. + * + * @returns A Promise (to which you can optionally subscribe to) that is resolved if the speech-to-text subscription + * was successful and rejected with an Error object if not. + */ + unsubscribeFromSpeechToText(stream: Stream): Promise { + return new Promise((resolve, reject) => { + this.openvidu.sendRequest( + 'unsubscribeFromSpeechToText', + { + connectionIds: [stream.connection.connectionId] + }, + (error, response) => { + if (!!error) { + return reject(error); + } else { + return resolve(); + } + } + ); + }); + } /** * See [[EventDispatcher.on]] @@ -893,11 +941,11 @@ export class Session extends EventDispatcher { this.getConnection( event.from, "Connection '" + - event.from + - "' unknown when 'onNewMessage'. Existing remote connections: " + - JSON.stringify(this.remoteConnections.keys()) + - '. Existing local connection: ' + - this.connection.connectionId + event.from + + "' unknown when 'onNewMessage'. Existing remote connections: " + + JSON.stringify(this.remoteConnections.keys()) + + '. Existing local connection: ' + + this.connection.connectionId ) .then((connection) => { @@ -968,10 +1016,10 @@ export class Session extends EventDispatcher { } else { logger.error( "No stream with streamId '" + - event.streamId + - "' found for connection '" + - event.connectionId + - "' on 'streamPropertyChanged' event" + event.streamId + + "' found for connection '" + + event.connectionId + + "' on 'streamPropertyChanged' event" ); } }; @@ -1057,11 +1105,11 @@ export class Session extends EventDispatcher { this.getConnection( event.senderConnectionId, 'Connection not found for connectionId ' + - event.senderConnectionId + - ' owning endpoint ' + - event.endpointName + - '. Ice candidate will be ignored: ' + - iceCandidate + event.senderConnectionId + + ' owning endpoint ' + + event.endpointName + + '. Ice candidate will be ignored: ' + + iceCandidate ) .then((connection) => { const stream: Stream = connection.stream!; @@ -1156,8 +1204,7 @@ export class Session extends EventDispatcher { const stream: Stream = connection.stream!; if (!stream || !stream.filter) { return logger.error( - `Filter event of type "${event.eventType}" dispatched for stream ${stream.streamId} but there is no ${ - !stream ? 'stream' : 'filter' + `Filter event of type "${event.eventType}" dispatched for stream ${stream.streamId} but there is no ${!stream ? 'stream' : 'filter' } defined` ); } @@ -1267,6 +1314,18 @@ export class Session extends EventDispatcher { } } + /** + * @hidden + */ + async onSpeechToTextMessage(event: { streamId: string; connectionId: string; sessionId: string, timestamp: number, raw: string }): Promise { + const connection = await this.getConnection(event.connectionId, 'No connection found for connectionId ' + event.connectionId); + const ev = new SpeechToTextEvent(this, connection, event.timestamp, event.raw); + this.ee.emitEvent('speechToText', [ev]); + if (ev.raw.includes('text')) { + console.log(ev); + } + } + /** * @hidden */ @@ -1402,12 +1461,12 @@ export class Session extends EventDispatcher { } else { logger.error( 'Browser ' + - platform.getName() + - ' (version ' + - platform.getVersion() + - ') for ' + - platform.getFamily() + - ' is not supported in OpenVidu for Network Quality' + platform.getName() + + ' (version ' + + platform.getVersion() + + ') for ' + + platform.getFamily() + + ' is not supported in OpenVidu for Network Quality' ); } } @@ -1678,15 +1737,15 @@ export class Session extends EventDispatcher { if (semverMajor(opts.version) !== semverMajor(this.openvidu.libraryVersion) || !(minorDifference == 0 || minorDifference == 1)) { logger.error( `openvidu-browser (${this.openvidu.libraryVersion}) and openvidu-server (${opts.version}) versions are incompatible. ` + - 'Errors are likely to occur. openvidu-browser SDK is only compatible with the same version or the immediately following minor version of an OpenVidu deployment' + 'Errors are likely to occur. openvidu-browser SDK is only compatible with the same version or the immediately following minor version of an OpenVidu deployment' ); } else if (minorDifference == 1) { logger.warn( `openvidu-browser version ${this.openvidu.libraryVersion} does not match openvidu-server version ${opts.version}. ` + - `These versions are still compatible with each other, but openvidu-browser version must be updated as soon as possible to ${semverMajor( - opts.version - )}.${semverMinor(opts.version)}.x. ` + - `This client using openvidu-browser ${this.openvidu.libraryVersion} will become incompatible with the next release of openvidu-server` + `These versions are still compatible with each other, but openvidu-browser version must be updated as soon as possible to ${semverMajor( + opts.version + )}.${semverMinor(opts.version)}.x. ` + + `This client using openvidu-browser ${this.openvidu.libraryVersion} will become incompatible with the next release of openvidu-server` ); } diff --git a/openvidu-browser/src/OpenViduInternal/Events/EventMap/SessionEventMap.ts b/openvidu-browser/src/OpenViduInternal/Events/EventMap/SessionEventMap.ts index 1656379a..f9f1f513 100644 --- a/openvidu-browser/src/OpenViduInternal/Events/EventMap/SessionEventMap.ts +++ b/openvidu-browser/src/OpenViduInternal/Events/EventMap/SessionEventMap.ts @@ -24,6 +24,7 @@ import { PublisherSpeakingEvent } from '../PublisherSpeakingEvent'; import { RecordingEvent } from '../RecordingEvent'; import { SessionDisconnectedEvent } from '../SessionDisconnectedEvent'; import { SignalEvent } from '../SignalEvent'; +import { SpeechToTextEvent } from '../SpeechToTextEvent'; import { StreamEvent } from '../StreamEvent'; import { StreamPropertyChangedEvent } from '../StreamPropertyChangedEvent'; @@ -158,6 +159,13 @@ export interface SessionEventMap extends EventMap { */ networkQualityLevelChanged: NetworkQualityLevelChangedEvent; + /** + * **This feature is part of OpenVidu Pro tier** PRO + * + * Event dispatched when a speech-to-text message has been received for certain Stream. See [Speech To Text](/en/stable/advanced-features/speech-to-text/). + */ + speechToTextMessage: SpeechToTextEvent; + /** * Event dispatched when the local user has lost its connection to the session, and starts the automatic reconnection process. * diff --git a/openvidu-browser/src/OpenViduInternal/Events/SpeechToTextEvent.ts b/openvidu-browser/src/OpenViduInternal/Events/SpeechToTextEvent.ts new file mode 100644 index 00000000..9d6d0d74 --- /dev/null +++ b/openvidu-browser/src/OpenViduInternal/Events/SpeechToTextEvent.ts @@ -0,0 +1,56 @@ +/* + * (C) Copyright 2017-2022 OpenVidu (https://openvidu.io) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +import { Event } from './Event'; +import { Connection } from '../../OpenVidu/Connection'; +import { Session } from '../../OpenVidu/Session'; + +/** + * Triggered by [[SessionEventMap.signal]] + */ +export class SpeechToTextEvent extends Event { + /** + * The connectionId of the + */ + connection: Connection; + + /** + * + */ + timestamp: number; + + /** + * The original event from the speech to text engine. This can vary depending on the engine + */ + raw: string; + + /** + * @hidden + */ + constructor(target: Session, connection: Connection, timestamp: number, raw: string) { + super(false, target, 'speechToText'); + this.connection = connection; + this.timestamp = timestamp; + this.raw = raw; + } + + /** + * @hidden + */ + // tslint:disable-next-line:no-empty + callDefaultBehavior() { } +} diff --git a/openvidu-browser/src/index.ts b/openvidu-browser/src/index.ts index fb94bb8d..a9f538fc 100644 --- a/openvidu-browser/src/index.ts +++ b/openvidu-browser/src/index.ts @@ -28,6 +28,7 @@ export { StreamPropertyChangedEvent } from './OpenViduInternal/Events/StreamProp export { ConnectionPropertyChangedEvent } from './OpenViduInternal/Events/ConnectionPropertyChangedEvent'; export { FilterEvent } from './OpenViduInternal/Events/FilterEvent'; export { NetworkQualityLevelChangedEvent } from './OpenViduInternal/Events/NetworkQualityLevelChangedEvent'; +export { SpeechToTextEvent } from './OpenViduInternal/Events/SpeechToTextEvent'; export { ExceptionEvent, ExceptionEventName } from './OpenViduInternal/Events/ExceptionEvent'; export { Capabilities } from './OpenViduInternal/Interfaces/Public/Capabilities'; diff --git a/openvidu-client/src/main/java/io/openvidu/client/internal/ProtocolElements.java b/openvidu-client/src/main/java/io/openvidu/client/internal/ProtocolElements.java index 2e0d3e88..fb1f633e 100644 --- a/openvidu-client/src/main/java/io/openvidu/client/internal/ProtocolElements.java +++ b/openvidu-client/src/main/java/io/openvidu/client/internal/ProtocolElements.java @@ -147,6 +147,12 @@ public class ProtocolElements { public static final String FORCIBLYRECONNECTSUBSCRIBER_STREAMID_PARAM = "streamId"; public static final String FORCIBLYRECONNECTSUBSCRIBER_SDPOFFER_PARAM = "sdpOffer"; + public static final String SUBSCRIBETOSPEECHTOTEXT_METHOD = "subscribeToSpeechToText"; + public static final String SUBSCRIBETOSPEECHTOTEXT_CONNECTIONIDS_PARAM = "connectionIds"; + + public static final String UNSUBSCRIBEFROMSPEECHTOTEXT_METHOD = "unsubscribeFromSpeechToText"; + public static final String UNSUBSCRIBEFROMSPEECHTOTEXT_CONNECTIONIDS_PARAM = "connectionIds"; + // ---------------------------- SERVER RESPONSES & EVENTS ----------------- public static final String PARTICIPANTJOINED_METHOD = "participantJoined"; @@ -221,7 +227,13 @@ public class ProtocolElements { public static final String RECORDINGSTOPPED_METHOD = "recordingStopped"; public static final String RECORDINGSTOPPED_ID_PARAM = "id"; - public static final String CUSTOM_NOTIFICATION = "custonNotification"; + public static final String SPEECHTOTEXTMESSAGE_METHOD = "speechToTextMessage"; + public static final String SPEECHTOTEXTMESSAGE_TIMESTAMP_PARAM = "timestamp"; + public static final String SPEECHTOTEXTMESSAGE_SESSIONID_PARAM = "sessionId"; + public static final String SPEECHTOTEXTMESSAGE_CONNECTIONID_PARAM = "connectionId"; + public static final String SPEECHTOTEXTMESSAGE_RAW_PARAM = "raw"; + + public static final String CUSTOM_NOTIFICATION = "customNotification"; public static final String RECORDER_PARTICIPANT_PUBLICID = "RECORDER"; } diff --git a/openvidu-server/src/main/java/io/openvidu/server/core/SessionEventsHandler.java b/openvidu-server/src/main/java/io/openvidu/server/core/SessionEventsHandler.java index 51f9642b..bbcde615 100644 --- a/openvidu-server/src/main/java/io/openvidu/server/core/SessionEventsHandler.java +++ b/openvidu-server/src/main/java/io/openvidu/server/core/SessionEventsHandler.java @@ -205,8 +205,7 @@ public class SessionEventsHandler { IceServerProperties defaultIceServer = new IceServerProperties.Builder() .url("turn:" + coturnIp + ":" + openviduConfig.getCoturnPort()) .username(participant.getToken().getTurnCredentials().getUsername()) - .credential(participant.getToken().getTurnCredentials().getCredential()) - .build(); + .credential(participant.getToken().getTurnCredentials().getCredential()).build(); defaultCustomIceServers.add(defaultIceServer.toJson()); result.add(ProtocolElements.PARTICIPANTJOINED_CUSTOM_ICE_SERVERS, defaultCustomIceServers); } @@ -693,6 +692,10 @@ public class SessionEventsHandler { public void onMediaNodeRecovered(Kms kms, String environmentId, long timeOfConnection) { } + public void onSpeechToTextMessage(String sessionId, String connectionId, long timestamp, String text, + Set subscribedParticipants) { + } + public void storeRecordingToSendClientEvent(Recording recording) { recordingsToSendClientEvents.put(recording.getSessionId(), recording); } diff --git a/openvidu-server/src/main/java/io/openvidu/server/core/SessionManager.java b/openvidu-server/src/main/java/io/openvidu/server/core/SessionManager.java index df495b6f..376cd1b4 100644 --- a/openvidu-server/src/main/java/io/openvidu/server/core/SessionManager.java +++ b/openvidu-server/src/main/java/io/openvidu/server/core/SessionManager.java @@ -39,6 +39,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; @@ -187,6 +188,12 @@ public abstract class SessionManager { public abstract void onVideoData(Participant participant, Integer transactionId, Integer height, Integer width, Boolean videoActive, Boolean audioActive); + public abstract void onSubscribeToSpeechToText(Participant participant, Integer transactionId, + JsonArray connectionIds); + + public abstract void onUnsubscribeFromSpeechToText(Participant participant, Integer transactionId, + JsonArray connectionIds); + public void onEcho(String participantPrivateId, Integer requestId) { sessionEventsHandler.onEcho(participantPrivateId, requestId); } diff --git a/openvidu-server/src/main/java/io/openvidu/server/kurento/core/KurentoSessionManager.java b/openvidu-server/src/main/java/io/openvidu/server/kurento/core/KurentoSessionManager.java index 44a6de6d..e2566f30 100644 --- a/openvidu-server/src/main/java/io/openvidu/server/kurento/core/KurentoSessionManager.java +++ b/openvidu-server/src/main/java/io/openvidu/server/kurento/core/KurentoSessionManager.java @@ -42,6 +42,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; +import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; @@ -1196,6 +1197,14 @@ public class KurentoSessionManager extends SessionManager { forciblyReconnect); } + @Override + public void onSubscribeToSpeechToText(Participant participant, Integer transactionId, JsonArray connectionIds) { + } + + @Override + public void onUnsubscribeFromSpeechToText(Participant participant, Integer transactionId, JsonArray connectionIds) { + } + private String mungeSdpOffer(Session kSession, Participant participant, String sdpOffer, boolean isPublisher) { boolean isTranscodingAllowed = kSession.getSessionProperties().isTranscodingAllowed(); VideoCodec forcedVideoCodec = kSession.getSessionProperties().forcedVideoCodecResolved(); diff --git a/openvidu-server/src/main/java/io/openvidu/server/rpc/RpcHandler.java b/openvidu-server/src/main/java/io/openvidu/server/rpc/RpcHandler.java index cd9dc8f3..ffaf6242 100644 --- a/openvidu-server/src/main/java/io/openvidu/server/rpc/RpcHandler.java +++ b/openvidu-server/src/main/java/io/openvidu/server/rpc/RpcHandler.java @@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpHeaders; +import com.google.gson.JsonArray; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import com.google.gson.JsonParser; @@ -180,6 +181,12 @@ public class RpcHandler extends DefaultJsonRpcHandler { case ProtocolElements.VIDEODATA_METHOD: updateVideoData(rpcConnection, request); break; + case ProtocolElements.SUBSCRIBETOSPEECHTOTEXT_METHOD: + subscribeToSpeechToText(rpcConnection, request); + break; + case ProtocolElements.UNSUBSCRIBEFROMSPEECHTOTEXT_METHOD: + unsubscribeFromSpeechToText(rpcConnection, request); + break; case ProtocolElements.ECHO_METHOD: echo(rpcConnection, request); break; @@ -700,6 +707,20 @@ public class RpcHandler extends DefaultJsonRpcHandler { } } + private void subscribeToSpeechToText(RpcConnection rpcConnection, Request request) { + Participant participant = sanityCheckOfSession(rpcConnection, "subscribeToSpeechToText"); + JsonArray connectionIds = (JsonArray) RpcHandler.getParam(request, + ProtocolElements.SUBSCRIBETOSPEECHTOTEXT_CONNECTIONIDS_PARAM); + sessionManager.onSubscribeToSpeechToText(participant, request.getId(), connectionIds); + } + + private void unsubscribeFromSpeechToText(RpcConnection rpcConnection, Request request) { + Participant participant = sanityCheckOfSession(rpcConnection, "unsubscribeFromSpeechToText"); + JsonArray connectionIds = (JsonArray) RpcHandler.getParam(request, + ProtocolElements.UNSUBSCRIBEFROMSPEECHTOTEXT_CONNECTIONIDS_PARAM); + sessionManager.onUnsubscribeFromSpeechToText(participant, request.getId(), connectionIds); + } + private void echo(RpcConnection rpcConnection, Request request) { sessionManager.onEcho(rpcConnection.getParticipantPrivateId(), request.getId()); }