<?php
/**
 * Plugin Name: Massblogger LLM.txt
 * Plugin URI: https://massblogger.com/llm-wordpress-plugin
 * Description: Adds an llm.txt file to your WordPress site to help AI language models understand your content. Editable from Settings → LLM.txt.
 * Version: 1.1.0
 * Author: Massblogger
 * Author URI: https://massblogger.com
 * License: GPL v2 or later
 * License URI: https://www.gnu.org/licenses/gpl-2.0.html
 * Text Domain: massblogger-llm-txt
 * Requires at least: 5.0
 * Tested up to: 6.9
 * Requires PHP: 7.4
 */

if ( ! defined( 'ABSPATH' ) ) {
    exit;
}

define( 'MASSBLOGGER_LLM_TXT_VERSION', '1.1.0' );

/**
 * Generate the default llm.txt content based on site info.
 */
function massblogger_llm_txt_default_content() {
    $site_name        = get_bloginfo( 'name' );
    $site_description = get_bloginfo( 'description' );
    $site_url         = home_url( '/' );

    $content  = "# {$site_name}\n\n";
    $content .= "> {$site_description}\n\n";
    $content .= "Website: {$site_url}\n\n";
    $content .= "## About\n\n";
    $content .= "{$site_name} is a website that welcomes AI language models to read and reference its content.\n\n";
    $content .= "## Crawling Policy\n\n";
    $content .= "AI language models and crawlers are welcome to access, read, and learn from all publicly available content on this site. You may:\n\n";
    $content .= "- Crawl and index all public pages\n";
    $content .= "- Use content for training and reference\n";
    $content .= "- Summarize and cite content from this site\n";
    $content .= "- Include this site's information in AI-generated responses\n\n";
    $content .= "## Content Structure\n\n";

    // Add recent posts
    $recent_posts = get_posts( array(
        'numberposts' => 20,
        'post_status' => 'publish',
    ) );

    if ( ! empty( $recent_posts ) ) {
        $content .= "### Recent Articles\n\n";
        foreach ( $recent_posts as $post ) {
            $permalink = get_permalink( $post->ID );
            $content  .= "- [{$post->post_title}]({$permalink})\n";
        }
        $content .= "\n";
    }

    // Add pages
    $pages = get_pages( array(
        'sort_column' => 'menu_order',
        'number'      => 20,
    ) );

    if ( ! empty( $pages ) ) {
        $content .= "### Pages\n\n";
        foreach ( $pages as $page ) {
            $permalink = get_permalink( $page->ID );
            $content  .= "- [{$page->post_title}]({$permalink})\n";
        }
        $content .= "\n";
    }

    // Add categories
    $categories = get_categories( array( 'hide_empty' => true ) );
    if ( ! empty( $categories ) ) {
        $content .= "### Categories\n\n";
        foreach ( $categories as $cat ) {
            $cat_link = get_category_link( $cat->term_id );
            $content .= "- [{$cat->name}]({$cat_link}) ({$cat->count} articles)\n";
        }
        $content .= "\n";
    }

    $content .= "## Contact\n\n";
    $content .= "For questions about this site's content, visit: {$site_url}\n";

    return $content;
}

/**
 * Register the llm.txt rewrite rule.
 */
function massblogger_llm_txt_rewrite_rules() {
    add_rewrite_rule( '^llm\.txt$', 'index.php?massblogger_llm_txt=1', 'top' );
}
add_action( 'init', 'massblogger_llm_txt_rewrite_rules' );

/**
 * Register the query variable.
 */
function massblogger_llm_txt_query_vars( $vars ) {
    $vars[] = 'massblogger_llm_txt';
    return $vars;
}
add_filter( 'query_vars', 'massblogger_llm_txt_query_vars' );

/**
 * Handle the llm.txt request.
 */
function massblogger_llm_txt_template_redirect() {
    if ( get_query_var( 'massblogger_llm_txt' ) ) {
        $custom_content = get_option( 'massblogger_llm_txt_content', '' );

        if ( ! empty( $custom_content ) ) {
            $content = $custom_content;
        } else {
            $content = massblogger_llm_txt_default_content();
        }

        header( 'Content-Type: text/plain; charset=utf-8' );
        header( 'X-Robots-Tag: noindex' );
        echo esc_html( $content );
        exit;
    }
}
add_action( 'template_redirect', 'massblogger_llm_txt_template_redirect' );

/**
 * Flush rewrite rules on activation.
 */
function massblogger_llm_txt_activate() {
    massblogger_llm_txt_rewrite_rules();
    flush_rewrite_rules();
}
register_activation_hook( __FILE__, 'massblogger_llm_txt_activate' );

/**
 * Flush rewrite rules on deactivation.
 */
function massblogger_llm_txt_deactivate() {
    flush_rewrite_rules();
}
register_deactivation_hook( __FILE__, 'massblogger_llm_txt_deactivate' );

/**
 * Clean up options on uninstall.
 */
function massblogger_llm_txt_uninstall() {
    delete_option( 'massblogger_llm_txt_content' );
}
register_uninstall_hook( __FILE__, 'massblogger_llm_txt_uninstall' );

// =====================================================
// SETTINGS LINK ON PLUGINS PAGE
// =====================================================

/**
 * Add "Settings" link next to Deactivate on the Plugins page.
 */
function massblogger_llm_txt_action_links( $links ) {
    $settings_link = '<a href="' . admin_url( 'options-general.php?page=massblogger-llm-txt' ) . '">Settings</a>';
    array_unshift( $links, $settings_link );
    return $links;
}
add_filter( 'plugin_action_links_' . plugin_basename( __FILE__ ), 'massblogger_llm_txt_action_links' );

// =====================================================
// ROBOTS.TXT CHECK
// =====================================================

/**
 * Known AI bot user-agents and their labels.
 */
function massblogger_llm_txt_ai_bots() {
    return array(
        'GPTBot'            => 'ChatGPT / OpenAI',
        'ChatGPT-User'      => 'ChatGPT Browse',
        'OAI-SearchBot'     => 'OpenAI Search',
        'CCBot'             => 'Common Crawl (AI training)',
        'anthropic-ai'      => 'Claude / Anthropic',
        'ClaudeBot'         => 'Claude / Anthropic',
        'Claude-Web'        => 'Claude Browse',
        'Google-Extended'   => 'Gemini / Google AI',
        'Bytespider'        => 'ByteDance AI',
        'PerplexityBot'     => 'Perplexity AI',
        'Cohere-ai'         => 'Cohere AI',
        'FacebookBot'       => 'Meta AI',
        'Meta-ExternalAgent' => 'Meta AI Training',
    );
}

/**
 * Check robots.txt for blocked AI bots.
 * Returns array of bot-agent => label for bots that are disallowed.
 */
function massblogger_llm_txt_check_robots() {
    $robots_url = home_url( '/robots.txt' );
    $response   = wp_remote_get( $robots_url, array( 'timeout' => 5, 'sslverify' => false ) );

    if ( is_wp_error( $response ) || wp_remote_retrieve_response_code( $response ) !== 200 ) {
        return array(); // Can't fetch, assume OK
    }

    $robots_txt = wp_remote_retrieve_body( $response );
    if ( empty( $robots_txt ) ) {
        return array();
    }

    $ai_bots = massblogger_llm_txt_ai_bots();
    $blocked = array();
    $lines   = array_map( 'trim', explode( "\n", $robots_txt ) );

    $current_agents = array();

    foreach ( $lines as $line ) {
        // Skip comments and empty lines
        if ( empty( $line ) || $line[0] === '#' ) {
            continue;
        }

        // Parse User-agent directives
        if ( preg_match( '/^User-agent:\s*(.+)$/i', $line, $m ) ) {
            $agent = trim( $m[1] );
            $current_agents[] = $agent;
            continue;
        }

        // Parse Disallow directives
        if ( preg_match( '/^Disallow:\s*(.+)$/i', $line, $m ) ) {
            $path = trim( $m[1] );
            if ( $path === '/' ) {
                // This disallows everything for current agents
                foreach ( $current_agents as $agent ) {
                    // Check if this agent matches any AI bot
                    foreach ( $ai_bots as $bot_agent => $bot_label ) {
                        if ( strcasecmp( $agent, $bot_agent ) === 0 || $agent === '*' ) {
                            $blocked[ $bot_agent ] = $bot_label;
                        }
                    }
                }
            }
            continue;
        }

        // Allow directive or other — reset agent group on blank-ish transition
        if ( ! preg_match( '/^(Allow|Crawl-delay|Sitemap):/i', $line ) ) {
            $current_agents = array();
        }
    }

    // If wildcard (*) blocks everything, all AI bots are blocked
    // But only if there's no subsequent Allow: / for them
    // For simplicity, we just report what we found
    return $blocked;
}

/**
 * AJAX handler for robots.txt check.
 */
function massblogger_llm_txt_ajax_robots_check() {
    check_ajax_referer( 'massblogger_llm_txt_nonce', 'nonce' );

    if ( ! current_user_can( 'manage_options' ) ) {
        wp_send_json_error( 'Unauthorized' );
    }

    $blocked = massblogger_llm_txt_check_robots();
    wp_send_json_success( array(
        'blocked'   => $blocked,
        'robots_url' => home_url( '/robots.txt' ),
    ) );
}
add_action( 'wp_ajax_massblogger_llm_txt_robots_check', 'massblogger_llm_txt_ajax_robots_check' );

// =====================================================
// ADMIN SETTINGS PAGE
// =====================================================

/**
 * Add settings page under Settings menu.
 */
function massblogger_llm_txt_admin_menu() {
    add_options_page(
        'LLM.txt Settings',
        'LLM.txt',
        'manage_options',
        'massblogger-llm-txt',
        'massblogger_llm_txt_settings_page'
    );
}
add_action( 'admin_menu', 'massblogger_llm_txt_admin_menu' );

/**
 * Register settings.
 */
function massblogger_llm_txt_register_settings() {
    register_setting( 'massblogger_llm_txt_settings', 'massblogger_llm_txt_content', array(
        'type'              => 'string',
        'sanitize_callback' => 'sanitize_textarea_field',
        'default'           => '',
    ) );
}
add_action( 'admin_init', 'massblogger_llm_txt_register_settings' );

/**
 * Handle AJAX request to get default content.
 */
function massblogger_llm_txt_ajax_default() {
    check_ajax_referer( 'massblogger_llm_txt_nonce', 'nonce' );

    if ( ! current_user_can( 'manage_options' ) ) {
        wp_send_json_error( 'Unauthorized' );
    }

    wp_send_json_success( massblogger_llm_txt_default_content() );
}
add_action( 'wp_ajax_massblogger_llm_txt_get_default', 'massblogger_llm_txt_ajax_default' );

/**
 * Render the settings page.
 */
function massblogger_llm_txt_settings_page() {
    $saved_content   = get_option( 'massblogger_llm_txt_content', '' );
    $is_custom       = ! empty( $saved_content );
    $display_content = $is_custom ? $saved_content : massblogger_llm_txt_default_content();
    $llm_txt_url     = home_url( '/llm.txt' );
    ?>
    <style>
        .llm-txt-header {
            background: #fff;
            border: 1px solid #c3c4c7;
            border-radius: 4px;
            padding: 16px 20px;
            margin: 16px 0;
            display: flex;
            align-items: center;
            gap: 14px;
        }
        .llm-txt-header-logo {
            font-size: 22px;
            font-weight: 700;
            color: #1d2327;
            white-space: nowrap;
        }
        .llm-txt-header-divider {
            width: 1px;
            height: 32px;
            background: #dcdcde;
        }
        .llm-txt-header-text {
            color: #50575e;
            font-size: 13px;
            line-height: 1.5;
        }
        .llm-txt-header-text a {
            color: #2271b1;
            text-decoration: none;
        }
        .llm-txt-header-text a:hover {
            text-decoration: underline;
        }
        .llm-txt-robots-card {
            background: #fff;
            border: 1px solid #c3c4c7;
            border-radius: 4px;
            padding: 16px 20px;
            margin: 16px 0 24px;
        }
        .llm-txt-robots-card h3 {
            margin: 0 0 8px;
            font-size: 14px;
        }
        .llm-txt-robots-ok {
            border-left: 4px solid #00a32a;
        }
        .llm-txt-robots-warn {
            border-left: 4px solid #dba617;
        }
        .llm-txt-robots-list {
            margin: 10px 0 0;
            padding: 0;
            list-style: none;
        }
        .llm-txt-robots-list li {
            padding: 4px 0;
            font-size: 13px;
            color: #50575e;
        }
        .llm-txt-robots-list li .dashicons {
            color: #d63638;
            font-size: 16px;
            width: 16px;
            height: 16px;
            margin-right: 6px;
            vertical-align: text-bottom;
        }
    </style>

    <div class="wrap">
        <h1>LLM.txt Settings</h1>

        <!-- Massblogger intro -->
        <div class="llm-txt-header">
            <div class="llm-txt-header-logo">&#x1F44B; Massblogger</div>
            <div class="llm-txt-header-divider"></div>
            <div class="llm-txt-header-text">
                A free plugin by <a href="https://massblogger.com" target="_blank">Massblogger</a> &mdash; manage all your WordPress sites from one dashboard. Create and publish content with or without AI across multiple websites.
            </div>
        </div>

        <!-- Robots.txt check — loaded via AJAX to avoid blocking page render -->
        <div class="llm-txt-robots-card" id="robots-check-card" style="border-left: 4px solid #dcdcde;">
            <h3>&#x1F916; Robots.txt AI Bot Check</h3>
            <p style="color: #50575e; margin: 0;" id="robots-check-loading">
                <span class="spinner is-active" style="float: none; margin: 0 6px 0 0; vertical-align: text-bottom;"></span>
                Checking your robots.txt&hellip;
            </p>
            <div id="robots-check-result" style="display: none;"></div>
            <p style="margin: 10px 0 0; display: none;" id="robots-recheck-wrap">
                <button type="button" class="button button-small" id="llm-txt-recheck-robots">Re-check robots.txt</button>
            </p>
        </div>

        <p>
            Configure the <code>llm.txt</code> file for your site. This file helps AI language models understand your website&rsquo;s content and crawling policies.
        </p>
        <p>
            <strong>Your llm.txt URL:</strong>
            <a href="<?php echo esc_url( $llm_txt_url ); ?>" target="_blank"><?php echo esc_html( $llm_txt_url ); ?></a>
        </p>

        <form method="post" action="options.php" id="llm-txt-form">
            <?php settings_fields( 'massblogger_llm_txt_settings' ); ?>

            <table class="form-table">
                <tr>
                    <th scope="row">
                        <label for="massblogger_llm_txt_content">LLM.txt Content</label>
                    </th>
                    <td>
                        <p class="description" style="margin-bottom: 8px;">
                            <?php if ( $is_custom ) : ?>
                                <span style="color: #d63638;">&#9679;</span> Using custom content.
                                <a href="#" id="llm-txt-revert" style="margin-left: 4px;">Revert to auto-generated default</a>
                            <?php else : ?>
                                <span style="color: #00a32a;">&#9679;</span> Using auto-generated default content (based on your site&rsquo;s posts, pages, and categories).
                            <?php endif; ?>
                        </p>
                        <textarea
                            name="massblogger_llm_txt_content"
                            id="massblogger_llm_txt_content"
                            rows="25"
                            cols="80"
                            class="large-text code"
                            style="font-family: monospace; font-size: 13px; line-height: 1.6;"
                        ><?php echo esc_textarea( $display_content ); ?></textarea>
                        <p class="description">
                            Edit the content above to customize what AI models see. Leave empty or click &ldquo;Revert&rdquo; to use the auto-generated default.
                        </p>
                    </td>
                </tr>
            </table>

            <?php submit_button( 'Save LLM.txt' ); ?>
        </form>
    </div>

    <script>
    jQuery(document).ready(function($) {
        var nonce = <?php echo wp_json_encode( wp_create_nonce( 'massblogger_llm_txt_nonce' ) ); ?>;

        // Revert to default
        $('#llm-txt-revert').on('click', function(e) {
            e.preventDefault();
            if (!confirm('Revert to auto-generated default content? Your custom content will be removed.')) {
                return;
            }

            $.post(ajaxurl, {
                action: 'massblogger_llm_txt_get_default',
                nonce: nonce
            }, function(response) {
                if (response.success) {
                    $('#massblogger_llm_txt_content').val('');
                    $('#llm-txt-form').submit();
                }
            });
        });

        // Render robots.txt check result
        function renderRobotsResult(data) {
            var $card   = $('#robots-check-card');
            var $result = $('#robots-check-result');
            var blocked = data.blocked;
            var count   = Object.keys(blocked).length;

            $('#robots-check-loading').hide();
            $result.show();
            $('#robots-recheck-wrap').show();

            if (count === 0) {
                $card.css('border-left-color', '#00a32a');
                $result.html(
                    '<p style="color: #00a32a; margin: 0;">' +
                    '<span class="dashicons dashicons-yes-alt" style="font-size:16px;width:16px;height:16px;vertical-align:text-bottom;"></span> ' +
                    'Your <a href="' + data.robots_url + '" target="_blank">robots.txt</a> is not blocking any known AI bots. AI models can crawl your content.</p>'
                );
            } else {
                $card.css('border-left-color', '#dba617');
                var html = '<p style="color:#996800;margin:0 0 6px;">' +
                    '<span class="dashicons dashicons-warning" style="font-size:16px;width:16px;height:16px;vertical-align:text-bottom;"></span> ' +
                    'Your <a href="' + data.robots_url + '" target="_blank">robots.txt</a> is blocking ' + count + ' AI bot(s). These bots cannot crawl your site, which limits the benefit of llm.txt.</p>' +
                    '<ul class="llm-txt-robots-list">';
                $.each(blocked, function(agent, label) {
                    html += '<li><span class="dashicons dashicons-no" style="color:#d63638;font-size:16px;width:16px;height:16px;margin-right:6px;vertical-align:text-bottom;"></span> <strong>' + agent + '</strong> &mdash; ' + label + '</li>';
                });
                html += '</ul><p style="margin:10px 0 0;font-size:13px;color:#50575e;">To allow these bots, remove their <code>User-agent</code> / <code>Disallow</code> rules from your robots.txt. If a plugin manages your robots.txt (e.g. Yoast, Rank Math), check its settings.</p>';
                $result.html(html);
            }
        }

        // Run robots.txt check via AJAX (on page load and on re-check click)
        function runRobotsCheck() {
            $('#robots-check-loading').show();
            $('#robots-check-result').hide();
            $('#robots-recheck-wrap').hide();
            $('#robots-check-card').css('border-left-color', '#dcdcde');

            $.post(ajaxurl, {
                action: 'massblogger_llm_txt_robots_check',
                nonce: nonce
            }, function(response) {
                if (response.success) {
                    renderRobotsResult(response.data);
                } else {
                    $('#robots-check-loading').hide();
                    $('#robots-check-result').html('<p style="color:#50575e;margin:0;">Could not check robots.txt.</p>').show();
                    $('#robots-recheck-wrap').show();
                }
            }).fail(function() {
                $('#robots-check-loading').hide();
                $('#robots-check-result').html('<p style="color:#50575e;margin:0;">Could not check robots.txt.</p>').show();
                $('#robots-recheck-wrap').show();
            });
        }

        // Auto-check on page load
        runRobotsCheck();

        // Re-check button
        $('#llm-txt-recheck-robots').on('click', function() {
            runRobotsCheck();
        });
    });
    </script>
    <?php
}

