Azure and AWS STT tests

2023-01-24 17:06:02 +01:00 · 2023-01-24 17:06:02 +01:00 · 7bd2ad5868
parent 2043c33761
commit 7bd2ad5868
3 changed files with 141 additions and 102 deletions
--- a/openvidu-test-e2e/src/main/java/io/openvidu/test/e2e/OpenViduEventManager.java
+++ b/openvidu-test-e2e/src/main/java/io/openvidu/test/e2e/OpenViduEventManager.java
@ -19,12 +19,9 @@ package io.openvidu.test.e2e;

 import static org.openqa.selenium.OutputType.BASE64;

-import java.awt.Point;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
 import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
@ -41,7 +38,6 @@ import java.util.function.Consumer;
 import org.openqa.selenium.JavascriptExecutor;
 import org.openqa.selenium.TakesScreenshot;
 import org.openqa.selenium.WebDriver;
-import org.openqa.selenium.WebElement;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

--- a/openvidu-test-e2e/src/main/java/io/openvidu/test/e2e/OpenViduTestE2e.java
+++ b/openvidu-test-e2e/src/main/java/io/openvidu/test/e2e/OpenViduTestE2e.java
@ -85,6 +85,13 @@ public class OpenViduTestE2e {
 	protected static String DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = "not_valid";
 	protected static String EXTERNAL_CUSTOM_LAYOUT_PARAMS = "sessionId,CUSTOM_LAYOUT_SESSION,secret,MY_SECRET";

+	protected static String AWS_REGION = "fakeRegion";
+	protected static String AWS_ACCESS_KEY_ID = "fakeKey";
+	protected static String AWS_SECRET_ACCESS_KEY = "fakeSecret";
+
+	protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = "fakeKey";
+	protected static String OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = "fakeRegion";
+
 	// https://hub.docker.com/r/selenium/standalone-chrome/tags
 	protected static String CHROME_VERSION = "latest";
 	// https://hub.docker.com/r/selenium/standalone-firefox/tags
@ -193,7 +200,8 @@ public class OpenViduTestE2e {
 	private static GenericContainer<?> androidContainer(String image, long shmSize) {
 		GenericContainer<?> android = new GenericContainer<>(DockerImageName.parse(image)).withPrivilegedMode(true)
 				.withEnv(Map.of("DEVICE", "Samsung Galaxy S10", "APPIUM", "true", "APPIUM_HOST", "172.17.0.1",
-						"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION", "2500m"))
+						"APPIUM_PORT", "4723", "MOBILE_WEB_TEST", "true", "RELAXED_SECURITY", "true", "DATAPARTITION",
+						"2500m"))
 				.withSharedMemorySize(shmSize).withExposedPorts(6080, 5554, 5555, 4723).waitingFor(waitAndroid)
 				.withFileSystemBind("/opt/openvidu/android", "/opt/openvidu/android").withReuse(true);
 		android.setPortBindings(Arrays.asList("6080:6080", "5554:5554", "5555:5555", "4723:4723"));
@ -307,6 +315,31 @@ public class OpenViduTestE2e {
 			OPENVIDU_PRO_SPEECH_TO_TEXT = openviduProSpeechToText;
 		}

+		String awsRegion = System.getProperty("AWS_REGION");
+		if (awsRegion != null) {
+			AWS_REGION = awsRegion;
+		}
+
+		String awsAccessKeyId = System.getProperty("AWS_ACCESS_KEY_ID");
+		if (awsAccessKeyId != null) {
+			AWS_ACCESS_KEY_ID = awsAccessKeyId;
+		}
+
+		String awsSecretAccessKey = System.getProperty("AWS_SECRET_ACCESS_KEY");
+		if (awsSecretAccessKey != null) {
+			AWS_SECRET_ACCESS_KEY = awsSecretAccessKey;
+		}
+
+		String azureKey = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY");
+		if (azureKey != null) {
+			OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY = azureKey;
+		}
+
+		String azureRegion = System.getProperty("OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION");
+		if (azureRegion != null) {
+			OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION = azureRegion;
+		}
+
 		String dockerhubPrivateRegistryPassword = System.getProperty("DOCKERHUB_PRIVATE_REGISTRY_PASSWORD");
 		if (dockerhubPrivateRegistryPassword != null) {
 			DOCKERHUB_PRIVATE_REGISTRY_PASSWORD = dockerhubPrivateRegistryPassword;
--- a/openvidu-test-e2e/src/test/java/io/openvidu/test/e2e/OpenViduProTestAppE2eTest.java
+++ b/openvidu-test-e2e/src/test/java/io/openvidu/test/e2e/OpenViduProTestAppE2eTest.java
@ -837,11 +837,6 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 				"on_demand");
 		restartOpenViduServer(config);

-		List<String> expectedRecognitionList = Arrays.asList(
-				"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
-				"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
-				"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
-
 		OpenViduTestappUser user = setupBrowserAndConnectToOpenViduTestapp("chromeFakeAudio");

 		user.getDriver().get(APP_URL);
@ -853,85 +848,7 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 		user.getEventManager().waitUntilEventReaches("streamCreated", 1);
 		user.getEventManager().waitUntilEventReaches("streamPlaying", 1);

-		List<String> recognizingSttEvents = new ArrayList<String>();
-		List<String> recognizedSttEvents = new ArrayList<String>();
-		final CountDownLatch latch = new CountDownLatch(3);
-
-		boolean[] previousSttEventWasRecognized = new boolean[1];
-		String[] previousSttRecognizedText = new String[1];
-		AssertionError[] exc = new AssertionError[1];
-
-		user.getEventManager().on("speechToTextMessage", (event) -> {
-			String reason = event.get("reason").getAsString();
-			String text = event.get("text").getAsString();
-			if ("recognizing".equals(reason)) {
-
-				previousSttEventWasRecognized[0] = false;
-				previousSttRecognizedText[0] = null;
-				recognizingSttEvents.add(text);
-
-			} else if ("recognized".equals(reason)) {
-
-				if (previousSttEventWasRecognized[0]) {
-					exc[0] = exc[0] == null
-							? new AssertionError("Two recognized events in a row should never happen. Present event: "
-									+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
-							: exc[0];
-					while (latch.getCount() > 0) {
-						latch.countDown();
-					}
-				}
-				previousSttEventWasRecognized[0] = true;
-				previousSttRecognizedText[0] = text;
-				log.info("Recognized: {}", text);
-				recognizedSttEvents.add(text);
-				latch.countDown();
-
-			} else {
-
-				exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
-						: exc[0];
-				while (latch.getCount() > 0) {
-					latch.countDown();
-				}
-
-			}
-		});
-
-		this.sttSubUser(user, 0, 0, "en-US", true, true);
-
-		if (!latch.await(80, TimeUnit.SECONDS)) {
-			Assertions.fail("Timeout waiting for recognized STT events");
-		}
-
-		if (exc[0] != null) {
-			throw exc[0];
-		}
-
-		Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
-		Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
-				"recognized STT events should be greater than 0");
-
-		// The expected text may be in just 2 recognized events instead of 3
-		int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
-		int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
-		int maxAllowedCountDifference = 50;
-		if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
-			recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
-			log.info("Removed one element of recognized collection!");
-		}
-
-		String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
-		String expectedRecognition = String.join(" ", expectedRecognitionList);
-
-		// Cosine similarity string comparison has been proven the most accurate one
-		double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
-
-		log.info("Cosine similiarity: {}", cosineSimilarity);
-		log.info(expectedRecognition);
-		log.info(finalRecognition);
-		Assertions.assertTrue(cosineSimilarity < 0.1,
-				"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
+		commonEnUsTranscriptionTest(user);

 		gracefullyLeaveParticipants(user, 1);
 	}
@ -2129,19 +2046,17 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 		gracefullyLeaveParticipants(user, 2);
 	}

-	// Try to unload via REST API a loaded model when on_demand. Should get a 405
-
 	@Test
-	@DisplayName("Wrong AWS lang STT Test")
-	void wrongAwsLangSttTest() throws Exception {
+	@DisplayName("AWS lang STT Test")
+	void awsLangSttTest() throws Exception {

-		log.info("Wrong AWS lang STT");
+		log.info("AWS lang STT");

 		CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);

 		Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT", "aws",
-				"OPENVIDU_PRO_AWS_ACCESS_KEY", "fakekey", "OPENVIDU_PRO_AWS_SECRET_KEY", "fakekey",
-				"OPENVIDU_PRO_AWS_REGION", "fakeregion");
+				"OPENVIDU_PRO_AWS_ACCESS_KEY", AWS_ACCESS_KEY_ID, "OPENVIDU_PRO_AWS_SECRET_KEY", AWS_SECRET_ACCESS_KEY,
+				"OPENVIDU_PRO_AWS_REGION", AWS_REGION);
 		restartOpenViduServer(config);

 		String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
@ -2155,22 +2070,25 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 		user.getEventManager().waitUntilEventReaches("streamCreated", 1);
 		user.getEventManager().waitUntilEventReaches("streamPlaying", 1);

-		sttSubUser(user, 0, 0, "no-EXIST", true, false, "AWS Transcribe does not support language \"no-EXIST\"", false);
+		commonEnUsTranscriptionTest(user);
+
+		// Test non-existing language
+		sttSubUser(user, 0, 0, "no-EXIST", true, true, "AWS Transcribe does not support language \"no-EXIST\"", false);

 		gracefullyLeaveParticipants(user, 1);
 	}

 	@Test
 	@DisplayName("Azure lang STT Test")
-	void wrongAzureLangSttTest() throws Exception {
+	void azureLangSttTest() throws Exception {

-		log.info("Wrong AWS lang STT");
+		log.info("Azure lang STT");

 		CustomHttpClient restClient = new CustomHttpClient(OPENVIDU_URL, "OPENVIDUAPP", OPENVIDU_SECRET);

 		Map<String, Object> config = Map.of("OPENVIDU_PRO_NETWORK_QUALITY", false, "OPENVIDU_PRO_SPEECH_TO_TEXT",
-				"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", "fakekey", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION",
-				"fakeregion");
+				"azure", "OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_KEY,
+				"OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION", OPENVIDU_PRO_SPEECH_TO_TEXT_AZURE_REGION);
 		restartOpenViduServer(config);

 		String body = "{'lang': 'en-US', 'mediaNode': {'id': 'NOT_EXISTS'}}";
@ -2184,7 +2102,10 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 		user.getEventManager().waitUntilEventReaches("streamCreated", 1);
 		user.getEventManager().waitUntilEventReaches("streamPlaying", 1);

-		sttSubUser(user, 0, 0, "no-EXIST", true, false, "Azure Speech to Text does not support language \"no-EXIST\"",
+		commonEnUsTranscriptionTest(user);
+
+		// Test non-existing language
+		sttSubUser(user, 0, 0, "no-EXIST", true, true, "Azure Speech to Text does not support language \"no-EXIST\"",
 				false);

 		gracefullyLeaveParticipants(user, 1);
@ -2396,4 +2317,93 @@ public class OpenViduProTestAppE2eTest extends AbstractOpenViduTestappE2eTest {
 		commandLine.executeCommand(restartCommand, 30);
 	}

+	private void commonEnUsTranscriptionTest(OpenViduTestappUser user) throws Exception {
+		List<String> expectedRecognitionList = Arrays.asList(
+				"for example we used to think that after childhood the brain did not really could not change and it turns out that nothing could be farther from the truth",
+				"another misconception about the brain is that you only use parts of it at any given time and silent when you do nothing",
+				"well this is also untrue it turns out that even when you are at rest and thinking of nothing your brain is highly active");
+
+		List<String> recognizingSttEvents = new ArrayList<String>();
+		List<String> recognizedSttEvents = new ArrayList<String>();
+		final CountDownLatch latch = new CountDownLatch(3);
+
+		boolean[] previousSttEventWasRecognized = new boolean[1];
+		String[] previousSttRecognizedText = new String[1];
+		AssertionError[] exc = new AssertionError[1];
+
+		user.getEventManager().on("speechToTextMessage", (event) -> {
+			String reason = event.get("reason").getAsString();
+			String text = event.get("text").getAsString();
+			if ("recognizing".equals(reason)) {
+
+				previousSttEventWasRecognized[0] = false;
+				previousSttRecognizedText[0] = null;
+				recognizingSttEvents.add(text);
+
+			} else if ("recognized".equals(reason)) {
+
+				if (previousSttEventWasRecognized[0]) {
+					exc[0] = exc[0] == null
+							? new AssertionError("Two recognized events in a row should never happen. Present event: "
+									+ event.get("text") + " | Previous event: \"" + previousSttRecognizedText[0] + "\"")
+							: exc[0];
+					while (latch.getCount() > 0) {
+						latch.countDown();
+					}
+				}
+				previousSttEventWasRecognized[0] = true;
+				previousSttRecognizedText[0] = text;
+				log.info("Recognized: {}", text);
+				recognizedSttEvents.add(text);
+				latch.countDown();
+
+			} else {
+
+				exc[0] = exc[0] == null ? new AssertionError("Unknown SpeechToText event 'reason' property " + reason)
+						: exc[0];
+				while (latch.getCount() > 0) {
+					latch.countDown();
+				}
+
+			}
+		});
+
+		this.sttSubUser(user, 0, 0, "en-US", true, false);
+
+		if (!latch.await(80, TimeUnit.SECONDS)) {
+			Assertions.fail("Timeout waiting for recognized STT events");
+		}
+
+		if (exc[0] != null) {
+			throw exc[0];
+		}
+
+		Assertions.assertTrue(recognizingSttEvents.size() > 0, "recognizing STT events should be greater than 0");
+		Assertions.assertTrue(recognizingSttEvents.size() > recognizedSttEvents.size(),
+				"recognized STT events should be greater than 0");
+
+		// The expected text may be in just 2 recognized events instead of 3
+		int expectedCharCount = expectedRecognitionList.stream().mapToInt(w -> w.length()).sum();
+		int recognizedCharCount = recognizedSttEvents.stream().mapToInt(w -> w.length()).sum();
+		int maxAllowedCountDifference = 50;
+		if (recognizedCharCount > (expectedCharCount + maxAllowedCountDifference)) {
+			recognizedSttEvents.remove(recognizedSttEvents.size() - 1);
+			log.info("Removed one element of recognized collection!");
+		}
+
+		String finalRecognition = String.join(" ", recognizedSttEvents).toLowerCase().replaceAll("[^a-z ]", "");
+		String expectedRecognition = String.join(" ", expectedRecognitionList);
+
+		// Cosine similarity string comparison has been proven the most accurate one
+		double cosineSimilarity = new Cosine().distance(finalRecognition, expectedRecognition);
+
+		log.info("Cosine similiarity: {}", cosineSimilarity);
+		log.info(expectedRecognition);
+		log.info(finalRecognition);
+		Assertions.assertTrue(cosineSimilarity < 0.1,
+				"Wrong similarity between actual and expected recognized text. Got " + cosineSimilarity);
+
+		sttUnsubUser(user, 0, 0, false, true);
+	}
+
 }