Azure and AWS STT tests

pull/786/head
pabloFuente 2023-01-24 17:06:02 +01:00
parent 2043c33761
commit 7bd2ad5868
3 changed files with 141 additions and 102 deletions

View File

@ -19,12 +19,9 @@ package io.openvidu.test.e2e;
import static org.openqa.selenium.OutputType.BASE64;
import java.awt.Point;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
@ -41,7 +38,6 @@ import java.util.function.Consumer;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -85,6 +85,13 @@ public class OpenViduTestE2e {
protected static String DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = "not_valid";
protected static String EXTERNAL_CUSTOM_LAYOUT_PARAMS = "sessionId,CUSTOM_LAYOUT_SESSION,secret,MY_SECRET";
protected static String AWS_REGION = "fakeRegion";
protected static String AWS_ACCESS_KEY_ID = "fakeKey";
protected static String AWS_SECRET_ACCESS_KEY = "fakeSecret";
protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = "fakeKey";
protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = "fakeRegion";
// https://hub.docker.com/r/selenium/standalone-chrome/tags
protected static String CHROME_VERSION = "latest";
// https://hub.docker.com/r/selenium/standalone-firefox/tags
@ -193,7 +200,8 @@ public class OpenViduTestE2e {
private static GenericContainer<?> androidContainer(String image, long shmSize) {
GenericContainer<?> android = new GenericContainer<>(DockerImageName.parse(image)).withPrivilegedMode(true)
.withEnv(Map.of("DEVICE", "Samsung Galaxy S10", "APPIUM", "true", "APPIUM_HOST", "172.17.0.1",
"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION", "2500m"))
"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION",
"2500m"))
.withSharedMemorySize(shmSize).withExposedPorts(6080, 5554, 5555, 4723).waitingFor(waitAndroid)
.withFileSystemBind("/opt/openvidu/android", "/opt/openvidu/android").withReuse(true);
android.setPortBindings(Arrays.asList("6080:6080", "5554:5554", "5555:5555", "4723:4723"));
@ -307,6 +315,31 @@ public class OpenViduTestE2e {
OPENVIDU_PRO_SPEECH_TO_TEXT = openviduProSpeechToText;
}
String awsRegion = System.getProperty("AWS_REGION");
if (awsRegion != null) {
AWS_REGION = awsRegion;
}
String awsAccessKeyId = System.getProperty("AWS_ACCESS_KEY_ID");
if (awsAccessKeyId != null) {
AWS_ACCESS_KEY_ID = awsAccessKeyId;
}
String awsSecretAccessKey = System.getProperty("AWS_SECRET_ACCESS_KEY");
if (awsSecretAccessKey != null) {
AWS_SECRET_ACCESS_KEY = awsSecretAccessKey;
}
String azureKey = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY");
if (azureKey != null) {
OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = azureKey;
}
String azureRegion = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION");
if (azureRegion != null) {
OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = azureRegion;
}
String dockerhubPrivateRegistryPassword = System.getProperty("DOCKERHUB_PRIVATE_REGISTRY_PASSWORD");
if (dockerhubPrivateRegistryPassword != null) {
DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = dockerhubPrivateRegistryPassword;

View File

@ -837,11 +837,6 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
"on_demand");
restartOpenViduServer(config);
List<String> expectedRecognitionList = Arrays.asList(
"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
OpenViduTestappUser user = setupBrowserAndConnectToOpenViduTestapp("chromeFakeAudio");
user.getDriver().get(APP_URL);
@ -853,85 +848,7 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
List<String> recognizingSttEvents = new ArrayList<String>();
List<String> recognizedSttEvents = new ArrayList<String>();
final CountDownLatch latch = new CountDownLatch(3);
boolean[] previousSttEventWasRecognized = new boolean[1];
String[] previousSttRecognizedText = new String[1];
AssertionError[] exc = new AssertionError[1];
user.getEventManager().on("speechToTextMessage", (event) -> {
String reason = event.get("reason").getAsString();
String text = event.get("text").getAsString();
if ("recognizing".equals(reason)) {
previousSttEventWasRecognized[0] = false;
previousSttRecognizedText[0] = null;
recognizingSttEvents.add(text);
} else if ("recognized".equals(reason)) {
if (previousSttEventWasRecognized[0]) {
exc[0] = exc[0] == null
? new AssertionError("Two recognized events in a row should never happen. Present event: "
+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
: exc[0];
while (latch.getCount() > 0) {
latch.countDown();
}
}
previousSttEventWasRecognized[0] = true;
previousSttRecognizedText[0] = text;
log.info("Recognized: {}", text);
recognizedSttEvents.add(text);
latch.countDown();
} else {
exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
: exc[0];
while (latch.getCount() > 0) {
latch.countDown();
}
}
});
this.sttSubUser(user, 0, 0, "en-US", true, true);
if (!latch.await(80, TimeUnit.SECONDS)) {
Assertions.fail("Timeout waiting for recognized STT events");
}
if (exc[0] != null) {
throw exc[0];
}
Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
"recognized STT events should be greater than 0");
// The expected text may be in just 2 recognized events instead of 3
int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
int maxAllowedCountDifference = 50;
if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
log.info("Removed one element of recognized collection!");
}
String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
String expectedRecognition = String.join(" ", expectedRecognitionList);
// Cosine similarity string comparison has been proven the most accurate one
double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
log.info("Cosine similiarity: {}", cosineSimilarity);
log.info(expectedRecognition);
log.info(finalRecognition);
Assertions.assertTrue(cosineSimilarity < 0.1,
"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
commonEnUsTranscriptionTest(user);
gracefullyLeaveParticipants(user, 1);
}
@ -2129,19 +2046,17 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
gracefullyLeaveParticipants(user, 2);
}
// Try to unload via REST API a loaded model when on_demand. Should get a 405
@Test
@DisplayName("Wrong AWS lang STT Test")
void wrongAwsLangSttTest() throws Exception {
@DisplayName("AWS lang STT Test")
void awsLangSttTest() throws Exception {
log.info("Wrong AWS lang STT");
log.info("AWS lang STT");
CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);
Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT", "aws",
"OPENVIDU_PRO_AWS_ACCESS_KEY", "fakekey", "OPENVIDU_PRO_AWS_SECRET_KEY", "fakekey",
"OPENVIDU_PRO_AWS_REGION", "fakeregion");
"OPENVIDU_PRO_AWS_ACCESS_KEY", AWS_ACCESS_KEY_ID, "OPENVIDU_PRO_AWS_SECRET_KEY", AWS_SECRET_ACCESS_KEY,
"OPENVIDU_PRO_AWS_REGION", AWS_REGION);
restartOpenViduServer(config);
String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
@ -2155,22 +2070,25 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
sttSubUser(user, 0, 0, "no-EXIST", true, false, "AWS Transcribe does not support language \"no-EXIST\"", false);
commonEnUsTranscriptionTest(user);
// Test non-existing language
sttSubUser(user, 0, 0, "no-EXIST", true, true, "AWS Transcribe does not support language \"no-EXIST\"", false);
gracefullyLeaveParticipants(user, 1);
}
@Test
@DisplayName("Azure lang STT Test")
void wrongAzureLangSttTest() throws Exception {
void azureLangSttTest() throws Exception {
log.info("Wrong AWS lang STT");
log.info("Azure lang STT");
CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);
Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT",
"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", "fakekey", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION",
"fakeregion");
"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY,
"OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION);
restartOpenViduServer(config);
String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
@ -2184,7 +2102,10 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
user.getEventManager().waitUntilEventReaches("streamCreated", 1);
user.getEventManager().waitUntilEventReaches("streamPlaying", 1);
sttSubUser(user, 0, 0, "no-EXIST", true, false, "Azure Speech to Text does not support language \"no-EXIST\"",
commonEnUsTranscriptionTest(user);
// Test non-existing language
sttSubUser(user, 0, 0, "no-EXIST", true, true, "Azure Speech to Text does not support language \"no-EXIST\"",
false);
gracefullyLeaveParticipants(user, 1);
@ -2396,4 +2317,93 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
commandLine.executeCommand(restartCommand, 30);
}
private void commonEnUsTranscriptionTest(OpenViduTestappUser user) throws Exception {
List<String> expectedRecognitionList = Arrays.asList(
"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
List<String> recognizingSttEvents = new ArrayList<String>();
List<String> recognizedSttEvents = new ArrayList<String>();
final CountDownLatch latch = new CountDownLatch(3);
boolean[] previousSttEventWasRecognized = new boolean[1];
String[] previousSttRecognizedText = new String[1];
AssertionError[] exc = new AssertionError[1];
user.getEventManager().on("speechToTextMessage", (event) -> {
String reason = event.get("reason").getAsString();
String text = event.get("text").getAsString();
if ("recognizing".equals(reason)) {
previousSttEventWasRecognized[0] = false;
previousSttRecognizedText[0] = null;
recognizingSttEvents.add(text);
} else if ("recognized".equals(reason)) {
if (previousSttEventWasRecognized[0]) {
exc[0] = exc[0] == null
? new AssertionError("Two recognized events in a row should never happen. Present event: "
+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
: exc[0];
while (latch.getCount() > 0) {
latch.countDown();
}
}
previousSttEventWasRecognized[0] = true;
previousSttRecognizedText[0] = text;
log.info("Recognized: {}", text);
recognizedSttEvents.add(text);
latch.countDown();
} else {
exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
: exc[0];
while (latch.getCount() > 0) {
latch.countDown();
}
}
});
this.sttSubUser(user, 0, 0, "en-US", true, false);
if (!latch.await(80, TimeUnit.SECONDS)) {
Assertions.fail("Timeout waiting for recognized STT events");
}
if (exc[0] != null) {
throw exc[0];
}
Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
"recognized STT events should be greater than 0");
// The expected text may be in just 2 recognized events instead of 3
int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
int maxAllowedCountDifference = 50;
if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
log.info("Removed one element of recognized collection!");
}
String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
String expectedRecognition = String.join(" ", expectedRecognitionList);
// Cosine similarity string comparison has been proven the most accurate one
double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
log.info("Cosine similiarity: {}", cosineSimilarity);
log.info(expectedRecognition);
log.info(finalRecognition);
Assertions.assertTrue(cosineSimilarity < 0.1,
"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
sttUnsubUser(user, 0, 0, false, true);
}
}