Get selenium locator with llm and java
Create Maven Project with pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>GenAI03</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>23</maven.compiler.source>
<maven.compiler.target>23</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-java -->
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.33.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.testng/testng -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>7.11.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-google-ai-gemini</artifactId>
<version>1.0.1-beta6</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-ollama</artifactId>
<version>1.0.1-beta6</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>1.0.0-beta3</version>
</dependency>
</dependencies>
</project>
Create Test Class
package browserai;
import dev.langchain4j.model.googleai.GoogleAiGeminiChatModel;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Login {
public static String PROMPT = """
You are a web automation expert. Your task is to generate a selenium locator for a given element based on the provided HTML.
get the selenium locator for {element} with the following html
```
{html}
```
return with css selector, dont explain, just return the locator
""";
public static String getLocatorByAI(String element, String html) {
GoogleAiGeminiChatModel geminiChatModel = GoogleAiGeminiChatModel.builder()
.apiKey("<your-gemini-api-key>")
.modelName("gemini-2.0-flash")
.build();
String chatResponse = geminiChatModel.chat(PROMPT.replace("{element}", element).replace("{html}", html));
Pattern pattern = Pattern.compile("(?s)```.*?\\n(.*?)\\n```");
Matcher matcher = pattern.matcher(chatResponse);
String locator = "";
if (matcher.find()) {
locator= matcher.group(1);
}
return locator;
}
public static void main(String[] args) {
WebDriver driver = new ChromeDriver();
driver.get("https://the-internet.herokuapp.com/login");
String usernameLocator = getLocatorByAI("get username input field", driver.getPageSource());
driver.findElement(By.cssSelector(usernameLocator)).sendKeys("tomsmith");
driver.findElement(By.id("password")).sendKeys("SuperSecretPassword!");
driver.findElement(By.className("radius")).click();
driver.quit();
}
}
Using Local LLM model with Ollama
public static String getLocatorByOllama(String html, String task) {
ChatModel chatModel = OllamaChatModel.builder()
.baseUrl("http://localhost:11434") // Ollama server URL
.modelName("qwen2.5:3b") // Replace with your Ollama model name
.logRequests(true)
.build();
String PROMPT = """
You are a expert selenium java automation engineer.
Using the html below to return css locator for {task}
```
{html}
```
DO NOT USING Parent Child relationship.
RETURN LOCATOR only DO NOT explain.
""";
PROMPT = PROMPT.replace("{task}", task).replace("{html}", html);
String answer = chatModel.chat(PROMPT);
System.out.println("[AI Response] " + answer);
return answer
.replaceFirst("(?s)<think>.*?</think>", "")
.replace("```", "")
.replace("`", "")
.replace("css", "")
.trim();
}
Using AI when selenium cannot find element
public static WebElement getElement(WebDriver driver,By locator, String locatorDescription) {
try {
return driver.findElement(locator);
} catch (Exception e) {
String html = driver.getPageSource();
String cssLocator = getLocatorByOllama(html, locatorDescription);
return driver.findElement(By.cssSelector(cssLocator));
}
}
Using AI is primary approach
public static WebElement getElementByAI(WebDriver driver, String task) {
String html = driver.getPageSource();
String locator = getLocatorByAI(html, task);
return driver.findElement(By.cssSelector(locator));
}
public static String getLocatorByAI(String html, String task){
GoogleAiGeminiChatModel geminiChatModel = GoogleAiGeminiChatModel.builder()
.apiKey("<>")
.modelName("gemini-2.5-flash-preview-05-20")
.build();
String PROMPT = """
You are a expert selenium java automation engineer.
Using the html below to return css locator for {task}
```
{html}
```
DO NOT USING Parent Child relationship.
RETURN LOCATOR only DO NOT explain.
""";
PROMPT = PROMPT.replace("{task}", task).replace("{html}", html);
String chatResponse = geminiChatModel.chat(PROMPT);
return chatResponse.replace("```", "").replace("`", "").trim();
}
Generate Data with specified format
@Test
void generateSpecificFormat(){
ResponseFormat responseFormat = ResponseFormat.builder()
.type(ResponseFormatType.JSON)
.jsonSchema(JsonSchema.builder()
.name("ContactFormDetails")
.rootElement(JsonObjectSchema.builder() // see [1] below
.addStringProperty("name")
.addIntegerProperty("email")
.addNumberProperty("phone")
.addBooleanProperty("subject")
.addBooleanProperty("description")
.required("name", "email","phone","subject","description") // see [2] below
.build())
.build())
.build();
OllamaChatModel chatModel = OllamaChatModel.builder()
.baseUrl("http://localhost:11434") // Ollama server URL
.modelName("qwen2.5:3b") // Replace with your Ollama model name
.logRequests(true)
.responseFormat(responseFormat)
.build();
String PROMPT ="""
You are a data generator. Create me random data in a
JSON format based on the criteria delimited by three hashes.
Additional data requirements are shared between back ticks.
###
name
email
phone `UK format`
subject `Over 20 characters in length`
description `Over 50 characters in length`
###
""";
String response = chatModel.chat(PROMPT);
System.out.println(response);
}
Last updated