<?php
/*
Title: Kandi 1.0 PHP Web-Crawler
Author: K0NxT3D
Files: index.php, style.css
File: index.php
Url: http://kandi.seaverns.com/
Description: Kandi 1.0.1 Php Web Crawler.
*/
function get_html_content($url) {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Follow redirects
    curl_setopt($ch, CURLOPT_MAXREDIRS, 10); // Max redirects to follow
    curl_setopt($ch, CURLOPT_TIMEOUT, 30); // Timeout after 30 seconds
    curl_setopt($ch, CURLOPT_USERAGENT, 'Kandi 1.0 PHP Web-Crawler'); // User-Agent
    $data = curl_exec($ch);

    if ($data === false) {
        $error_message = 'cURL Error: ' . curl_error($ch);
        curl_close($ch);
        return ['success' => false, 'message' => $error_message];
    }

    $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    if ($http_code >= 400) {
        $error_message = "HTTP Error: $http_code for URL $url";
        curl_close($ch);
        return ['success' => false, 'message' => $error_message];
    }

    curl_close($ch);
    return ['success' => true, 'data' => $data];
}

function get_links($html, $base_url) {
    $links = [];
    $dom = new DOMDocument();
    @$dom->loadHTML($html); // Suppress errors for invalid HTML

    $xpath = new DOMXPath($dom);
    $elements = $xpath->query('//a[@href]');

    foreach ($elements as $element) {
        $href = $element->getAttribute('href');
        $full_url = resolve_url($href, $base_url);
        if (!in_array($full_url, $links) && filter_var($full_url, FILTER_VALIDATE_URL)) {
            $links[] = $full_url;
        }
    }

    return $links;
}

function resolve_url($relative_url, $base_url) {
    // Handle absolute URLs
    if (parse_url($relative_url, PHP_URL_SCHEME) != '') {
        return $relative_url;
    }

    // Handle protocol-relative URLs
    if (strpos($relative_url, '//') === 0) {
        return parse_url($base_url, PHP_URL_SCHEME) . ':' . $relative_url;
    }

    // Resolve relative URLs
    $base_url = rtrim($base_url, '/');
    $relative_url = ltrim($relative_url, '/');
    return $base_url . '/' . $relative_url;
}

function crawl_website($start_url, $max_depth = 20) { // Set crawl depth
    $to_visit = [$start_url];
    $visited = [];
    $results = [];
    $depth = 0;

    while (!empty($to_visit) && $depth < $max_depth) {
        $current_url = array_shift($to_visit);
        if (in_array($current_url, $visited)) {
            continue;
        }
        $visited[] = $current_url;
        $response = get_html_content($current_url);

        if (!$response['success']) {
            $results[] = ['url' => $current_url, 'message' => $response['message']];
            continue; // Skip if unable to get content
        }

        $html = $response['data'];
        $links = get_links($html, $current_url);
        foreach ($links as $link) {
            if (!in_array($link, $visited) && !in_array($link, $to_visit)) {
                $to_visit[] = $link;
            }
        }

        $results[] = ['url' => $current_url, 'message' => 'Crawled successfully'];
        $depth++;
    }

    return $results;
}

// Handle form submission
$pages = [];
$messages = [];
$start_time = null;
$end_time = null;
$elapsed_time = '';
$page_count = 0;
$results_message = '';

if ($_SERVER["REQUEST_METHOD"] == "POST" && !empty($_POST["start_url"])) {
    $start_url = htmlspecialchars(trim($_POST["start_url"]));

    if (filter_var($start_url, FILTER_VALIDATE_URL)) {
        $start_time = microtime(true); // Start time
        $messages[] = "Crawling started for URL: " . htmlspecialchars($start_url);
        $pages = crawl_website($start_url);
        $end_time = microtime(true); // End time
        
        // Calculate elapsed time
        $elapsed_time = $end_time - $start_time;
        $elapsed_time_formatted = number_format($elapsed_time, 2);

        // Count pages
        $page_count = count($pages);
        $results_message = "Crawled: $page_count Pages | Elapsed Time: $elapsed_time_formatted seconds";
    } else {
        $messages[] = "Invalid URL provided. Please enter a valid URL.";
    }
}

?>
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link rel="stylesheet" type="text/css" href="style.css" />
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Oswald&display=swap" rel="stylesheet">
    <title>Kandi 1.0 PHP Web-Crawler</title>
</head>
<body>
  <h1>Kandi 1.0 PHP Web-Crawler</h1>
    <form method="POST">
     <label for="start_url">Enter the URL of the website to crawl:</label><br>
     <textarea id="start_url" name="start_url" rows="1" cols="36"><?php echo isset($_POST['start_url']) ? htmlspecialchars($_POST['start_url']) : ''; ?></textarea><br>
     <input id="crawl" type="submit" value="Crawl Website">
    </form>
    <?php if (!empty($messages)): ?>
  <div>
  <?php foreach ($messages as $message): ?>
     <p class="<?php echo strpos($message, 'Invalid') !== false ? 'error' : 'success'; ?>"><?php echo htmlspecialchars($message); ?></p>
    <?php endforeach; ?>
  </div>
    <?php endif; ?>
   <?php if (!empty($pages)): ?>
 <!-- Begin Display Kandi Results -->
        <h2>Results:</h2>
         <ul>
    <?php foreach ($pages as $page): ?>
    <?php if (isset($page['message'])): ?>
  <li><?php echo htmlspecialchars($page['url']); ?> - <span class="<?php echo strpos($page['message'], 'Error') !== false ? 'error' : 'success'; ?>"><?php echo htmlspecialchars($page['message']); ?></span></li>
<?php else: ?>
<li><a href="<?php echo htmlspecialchars($page['url']); ?>" target="_blank"><?php echo htmlspecialchars($page['url']); ?></a></li>
                <?php endif; ?>
    <?php endforeach; ?>
     </ul>
        <!-- End Display Kandi Results -->
<?php endif; ?>
<?php if ($start_time && $end_time): ?>
    <p><?php echo $results_message; ?></p>
<?php endif; ?>
</body>
</html>
