mirror of https://github.com/OpenVidu/openvidu.git
Azure and AWS STT tests
parent
2043c33761
commit
7bd2ad5868
|
@ -19,12 +19,9 @@ package io.openvidu.test.e2e;
|
|||
|
||||
import static org.openqa.selenium.OutputType.BASE64;
|
||||
|
||||
import java.awt.Point;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
@ -41,7 +38,6 @@ import java.util.function.Consumer;
|
|||
import org.openqa.selenium.JavascriptExecutor;
|
||||
import org.openqa.selenium.TakesScreenshot;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
|
|
@ -85,6 +85,13 @@ public class OpenViduTestE2e {
|
|||
protected static String DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = "not_valid";
|
||||
protected static String EXTERNAL_CUSTOM_LAYOUT_PARAMS = "sessionId,CUSTOM_LAYOUT_SESSION,secret,MY_SECRET";
|
||||
|
||||
protected static String AWS_REGION = "fakeRegion";
|
||||
protected static String AWS_ACCESS_KEY_ID = "fakeKey";
|
||||
protected static String AWS_SECRET_ACCESS_KEY = "fakeSecret";
|
||||
|
||||
protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = "fakeKey";
|
||||
protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = "fakeRegion";
|
||||
|
||||
// https://hub.docker.com/r/selenium/standalone-chrome/tags
|
||||
protected static String CHROME_VERSION = "latest";
|
||||
// https://hub.docker.com/r/selenium/standalone-firefox/tags
|
||||
|
@ -193,7 +200,8 @@ public class OpenViduTestE2e {
|
|||
private static GenericContainer<?> androidContainer(String image, long shmSize) {
|
||||
GenericContainer<?> android = new GenericContainer<>(DockerImageName.parse(image)).withPrivilegedMode(true)
|
||||
.withEnv(Map.of("DEVICE", "Samsung Galaxy S10", "APPIUM", "true", "APPIUM_HOST", "172.17.0.1",
|
||||
"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION", "2500m"))
|
||||
"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION",
|
||||
"2500m"))
|
||||
.withSharedMemorySize(shmSize).withExposedPorts(6080, 5554, 5555, 4723).waitingFor(waitAndroid)
|
||||
.withFileSystemBind("/opt/openvidu/android", "/opt/openvidu/android").withReuse(true);
|
||||
android.setPortBindings(Arrays.asList("6080:6080", "5554:5554", "5555:5555", "4723:4723"));
|
||||
|
@ -307,6 +315,31 @@ public class OpenViduTestE2e {
|
|||
OPENVIDU_PRO_SPEECH_TO_TEXT = openviduProSpeechToText;
|
||||
}
|
||||
|
||||
String awsRegion = System.getProperty("AWS_REGION");
|
||||
if (awsRegion != null) {
|
||||
AWS_REGION = awsRegion;
|
||||
}
|
||||
|
||||
String awsAccessKeyId = System.getProperty("AWS_ACCESS_KEY_ID");
|
||||
if (awsAccessKeyId != null) {
|
||||
AWS_ACCESS_KEY_ID = awsAccessKeyId;
|
||||
}
|
||||
|
||||
String awsSecretAccessKey = System.getProperty("AWS_SECRET_ACCESS_KEY");
|
||||
if (awsSecretAccessKey != null) {
|
||||
AWS_SECRET_ACCESS_KEY = awsSecretAccessKey;
|
||||
}
|
||||
|
||||
String azureKey = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY");
|
||||
if (azureKey != null) {
|
||||
OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = azureKey;
|
||||
}
|
||||
|
||||
String azureRegion = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION");
|
||||
if (azureRegion != null) {
|
||||
OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = azureRegion;
|
||||
}
|
||||
|
||||
String dockerhubPrivateRegistryPassword = System.getProperty("DOCKERHUB_PRIVATE_REGISTRY_PASSWORD");
|
||||
if (dockerhubPrivateRegistryPassword != null) {
|
||||
DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = dockerhubPrivateRegistryPassword;
|
||||
|
|
|
@ -837,11 +837,6 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
"on_demand");
|
||||
restartOpenViduServer(config);
|
||||
|
||||
List<String> expectedRecognitionList = Arrays.asList(
|
||||
"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
|
||||
"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
|
||||
"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
|
||||
|
||||
OpenViduTestappUser user = setupBrowserAndConnectToOpenViduTestapp("chromeFakeAudio");
|
||||
|
||||
user.getDriver().get(APP_URL);
|
||||
|
@ -853,85 +848,7 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
|
||||
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
|
||||
|
||||
List<String> recognizingSttEvents = new ArrayList<String>();
|
||||
List<String> recognizedSttEvents = new ArrayList<String>();
|
||||
final CountDownLatch latch = new CountDownLatch(3);
|
||||
|
||||
boolean[] previousSttEventWasRecognized = new boolean[1];
|
||||
String[] previousSttRecognizedText = new String[1];
|
||||
AssertionError[] exc = new AssertionError[1];
|
||||
|
||||
user.getEventManager().on("speechToTextMessage", (event) -> {
|
||||
String reason = event.get("reason").getAsString();
|
||||
String text = event.get("text").getAsString();
|
||||
if ("recognizing".equals(reason)) {
|
||||
|
||||
previousSttEventWasRecognized[0] = false;
|
||||
previousSttRecognizedText[0] = null;
|
||||
recognizingSttEvents.add(text);
|
||||
|
||||
} else if ("recognized".equals(reason)) {
|
||||
|
||||
if (previousSttEventWasRecognized[0]) {
|
||||
exc[0] = exc[0] == null
|
||||
? new AssertionError("Two recognized events in a row should never happen. Present event: "
|
||||
+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
|
||||
: exc[0];
|
||||
while (latch.getCount() > 0) {
|
||||
latch.countDown();
|
||||
}
|
||||
}
|
||||
previousSttEventWasRecognized[0] = true;
|
||||
previousSttRecognizedText[0] = text;
|
||||
log.info("Recognized: {}", text);
|
||||
recognizedSttEvents.add(text);
|
||||
latch.countDown();
|
||||
|
||||
} else {
|
||||
|
||||
exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
|
||||
: exc[0];
|
||||
while (latch.getCount() > 0) {
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
this.sttSubUser(user, 0, 0, "en-US", true, true);
|
||||
|
||||
if (!latch.await(80, TimeUnit.SECONDS)) {
|
||||
Assertions.fail("Timeout waiting for recognized STT events");
|
||||
}
|
||||
|
||||
if (exc[0] != null) {
|
||||
throw exc[0];
|
||||
}
|
||||
|
||||
Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
|
||||
Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
|
||||
"recognized STT events should be greater than 0");
|
||||
|
||||
// The expected text may be in just 2 recognized events instead of 3
|
||||
int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
|
||||
int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
|
||||
int maxAllowedCountDifference = 50;
|
||||
if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
|
||||
recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
|
||||
log.info("Removed one element of recognized collection!");
|
||||
}
|
||||
|
||||
String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
|
||||
String expectedRecognition = String.join(" ", expectedRecognitionList);
|
||||
|
||||
// Cosine similarity string comparison has been proven the most accurate one
|
||||
double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
|
||||
|
||||
log.info("Cosine similiarity: {}", cosineSimilarity);
|
||||
log.info(expectedRecognition);
|
||||
log.info(finalRecognition);
|
||||
Assertions.assertTrue(cosineSimilarity < 0.1,
|
||||
"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
|
||||
commonEnUsTranscriptionTest(user);
|
||||
|
||||
gracefullyLeaveParticipants(user, 1);
|
||||
}
|
||||
|
@ -2129,19 +2046,17 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
gracefullyLeaveParticipants(user, 2);
|
||||
}
|
||||
|
||||
// Try to unload via REST API a loaded model when on_demand. Should get a 405
|
||||
|
||||
@Test
|
||||
@DisplayName("Wrong AWS lang STT Test")
|
||||
void wrongAwsLangSttTest() throws Exception {
|
||||
@DisplayName("AWS lang STT Test")
|
||||
void awsLangSttTest() throws Exception {
|
||||
|
||||
log.info("Wrong AWS lang STT");
|
||||
log.info("AWS lang STT");
|
||||
|
||||
CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);
|
||||
|
||||
Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT", "aws",
|
||||
"OPENVIDU_PRO_AWS_ACCESS_KEY", "fakekey", "OPENVIDU_PRO_AWS_SECRET_KEY", "fakekey",
|
||||
"OPENVIDU_PRO_AWS_REGION", "fakeregion");
|
||||
"OPENVIDU_PRO_AWS_ACCESS_KEY", AWS_ACCESS_KEY_ID, "OPENVIDU_PRO_AWS_SECRET_KEY", AWS_SECRET_ACCESS_KEY,
|
||||
"OPENVIDU_PRO_AWS_REGION", AWS_REGION);
|
||||
restartOpenViduServer(config);
|
||||
|
||||
String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
|
||||
|
@ -2155,22 +2070,25 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
|
||||
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
|
||||
|
||||
sttSubUser(user, 0, 0, "no-EXIST", true, false, "AWS Transcribe does not support language \"no-EXIST\"", false);
|
||||
commonEnUsTranscriptionTest(user);
|
||||
|
||||
// Test non-existing language
|
||||
sttSubUser(user, 0, 0, "no-EXIST", true, true, "AWS Transcribe does not support language \"no-EXIST\"", false);
|
||||
|
||||
gracefullyLeaveParticipants(user, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@DisplayName("Azure lang STT Test")
|
||||
void wrongAzureLangSttTest() throws Exception {
|
||||
void azureLangSttTest() throws Exception {
|
||||
|
||||
log.info("Wrong AWS lang STT");
|
||||
log.info("Azure lang STT");
|
||||
|
||||
CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);
|
||||
|
||||
Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT",
|
||||
"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", "fakekey", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION",
|
||||
"fakeregion");
|
||||
"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY,
|
||||
"OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION);
|
||||
restartOpenViduServer(config);
|
||||
|
||||
String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
|
||||
|
@ -2184,7 +2102,10 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
|
||||
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
|
||||
|
||||
sttSubUser(user, 0, 0, "no-EXIST", true, false, "Azure Speech to Text does not support language \"no-EXIST\"",
|
||||
commonEnUsTranscriptionTest(user);
|
||||
|
||||
// Test non-existing language
|
||||
sttSubUser(user, 0, 0, "no-EXIST", true, true, "Azure Speech to Text does not support language \"no-EXIST\"",
|
||||
false);
|
||||
|
||||
gracefullyLeaveParticipants(user, 1);
|
||||
|
@ -2396,4 +2317,93 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
|
|||
commandLine.executeCommand(restartCommand, 30);
|
||||
}
|
||||
|
||||
private void commonEnUsTranscriptionTest(OpenViduTestappUser user) throws Exception {
|
||||
List<String> expectedRecognitionList = Arrays.asList(
|
||||
"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
|
||||
"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
|
||||
"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
|
||||
|
||||
List<String> recognizingSttEvents = new ArrayList<String>();
|
||||
List<String> recognizedSttEvents = new ArrayList<String>();
|
||||
final CountDownLatch latch = new CountDownLatch(3);
|
||||
|
||||
boolean[] previousSttEventWasRecognized = new boolean[1];
|
||||
String[] previousSttRecognizedText = new String[1];
|
||||
AssertionError[] exc = new AssertionError[1];
|
||||
|
||||
user.getEventManager().on("speechToTextMessage", (event) -> {
|
||||
String reason = event.get("reason").getAsString();
|
||||
String text = event.get("text").getAsString();
|
||||
if ("recognizing".equals(reason)) {
|
||||
|
||||
previousSttEventWasRecognized[0] = false;
|
||||
previousSttRecognizedText[0] = null;
|
||||
recognizingSttEvents.add(text);
|
||||
|
||||
} else if ("recognized".equals(reason)) {
|
||||
|
||||
if (previousSttEventWasRecognized[0]) {
|
||||
exc[0] = exc[0] == null
|
||||
? new AssertionError("Two recognized events in a row should never happen. Present event: "
|
||||
+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
|
||||
: exc[0];
|
||||
while (latch.getCount() > 0) {
|
||||
latch.countDown();
|
||||
}
|
||||
}
|
||||
previousSttEventWasRecognized[0] = true;
|
||||
previousSttRecognizedText[0] = text;
|
||||
log.info("Recognized: {}", text);
|
||||
recognizedSttEvents.add(text);
|
||||
latch.countDown();
|
||||
|
||||
} else {
|
||||
|
||||
exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
|
||||
: exc[0];
|
||||
while (latch.getCount() > 0) {
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
|
||||
this.sttSubUser(user, 0, 0, "en-US", true, false);
|
||||
|
||||
if (!latch.await(80, TimeUnit.SECONDS)) {
|
||||
Assertions.fail("Timeout waiting for recognized STT events");
|
||||
}
|
||||
|
||||
if (exc[0] != null) {
|
||||
throw exc[0];
|
||||
}
|
||||
|
||||
Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
|
||||
Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
|
||||
"recognized STT events should be greater than 0");
|
||||
|
||||
// The expected text may be in just 2 recognized events instead of 3
|
||||
int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
|
||||
int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
|
||||
int maxAllowedCountDifference = 50;
|
||||
if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
|
||||
recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
|
||||
log.info("Removed one element of recognized collection!");
|
||||
}
|
||||
|
||||
String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
|
||||
String expectedRecognition = String.join(" ", expectedRecognitionList);
|
||||
|
||||
// Cosine similarity string comparison has been proven the most accurate one
|
||||
double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
|
||||
|
||||
log.info("Cosine similiarity: {}", cosineSimilarity);
|
||||
log.info(expectedRecognition);
|
||||
log.info(finalRecognition);
|
||||
Assertions.assertTrue(cosineSimilarity < 0.1,
|
||||
"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
|
||||
|
||||
sttUnsubUser(user, 0, 0, false, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue