C:\s23JavaSoupProject>javac -cp "jsoup-1.16.1.jar" WebCrawler.java
javaCopy code
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
public class Main {
public static void main(String[] args) {
Document document;
try {
// Get the HTML document
document = Jsoup.connect("http://example.com").get();
// Get the title of the webpage
String title = document.title();
System.out.println("Title: " + title);
// Get all links on the page
Elements links = document.select("a[href]");
// Create an ArrayList to store the links
ArrayList<String> linkList = new ArrayList<>();
// Iterate through the links and add them to the ArrayList
for (Element link : links) {
String url = link.attr("href");
System.out.println("Link: " + url);
linkList.add(url);
}
// Print the ArrayList of links
System.out.println("Retrieved URLs:");
for (String link : linkList) {
System.out.println(link);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
javaCopy code
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class WebPageFetcher {
public static void fetchAndPrintPageContent(String url) {
Document document;
try {
// Get the HTML document for the provided URL
document = Jsoup.connect(url).get();
// Get the title of the webpage
String title = document.title();
System.out.println("Title: " + title);
// Get all links on the page
Elements links = document.select("a[href]");
// Print the links on the page
System.out.println("Links on the page:");
for (Element link : links) {
System.out.println(link.attr("href"));
}
// Print other content from the page, if required
// For example, you can extract text or other elements here.
} catch (IOException e) {
e.printStackTrace();
}
}
}
javaCopy code
import java.util.ArrayList;
public class Main {