<?php
/**
 * Main Checker class.
 *
 * @package dvic\classes
 */

namespace DVIC;

/**
 * Main Checker class.
 */
final class Checker {

	/**
	 * Get all post types as keys of the returned array.
	 * Each post type exposes its taxonomies as an array.
	 * Each taxonomy exposes its terms that are not empty as an object.
	 *
	 * @return array List of all post types detected.
	 */
	public static function get_posts_data_for_filters() {
		$output         = array();
		$index          = 0;
		$all_post_types = self::get_post_types();
		foreach ( (array) $all_post_types as $post_type_name ) {
			$count_posts      = intval( wp_count_posts( $post_type_name )->publish );
			$output[ $index ] = array(
				'post_type'   => self::get_post_type_object( $post_type_name ),
				'count_posts' => $count_posts,
				'taxonomies'  => array(),
			);
			$taxonomies       = self::get_object_taxonomies( $post_type_name );
			foreach ( (array) $taxonomies as $taxonomy_name ) {
				$count_terms = self::count_terms( $taxonomy_name );
				if ( 0 === $count_terms || $count_terms > 250 ) {
					continue;
				}
				$terms       = self::get_terms( $taxonomy_name );
				$count_posts = 0;
				foreach ( $terms as $terms_data ) {
					$count_posts += intval( $terms_data->count );
				}
				$output[ $index ]['taxonomies'][] = array(
					'name'        => $taxonomy_name,
					'label'       => get_taxonomy( $taxonomy_name )->label,
					'count_posts' => $count_posts,
					'terms'       => $terms,
				);
			}
			$index++;
		}
		return $output;
	}

	/**
	 * Count terms in a given $taxonomy_name.
	 *
	 * @param string $taxonomy_name The name of the taxonomy we want to count terms of.
	 *
	 * @return int The count of terms in the $taxonomy_name.
	 */
	private static function count_terms( $taxonomy_name ) {
		$args = array(
			'hide_empty' => false,
			'parent'     => 0,
		);
		return intval( wp_count_terms( $taxonomy_name, $args ) );
	}

	/**
	 * Get the name and the label of a given $post_type_name.
	 *
	 * @param string $post_type_name The name of the post_type we want data from.
	 *
	 * @return array An array with the name and the labl of the post_type.
	 */
	private static function get_post_type_object( $post_type_name ) {
		$post_type_object = get_post_type_object( $post_type_name );
		return array(
			'name'  => $post_type_object->name,
			'label' => $post_type_object->labels->name,
		);
	}

	/**
	 * Get terms from a given taxonomy name.
	 *
	 * @param string $taxonomy_name The taxonomy name to get terms from.
	 *
	 * @return array List of all terms from the given taxonomy name.
	 */
	private static function get_terms( $taxonomy_name ) {
		return get_terms(
			array(
				'taxonomy'   => $taxonomy_name,
				'hide_empty' => true,
			)
		);
	}

	/**
	 * Get all post types to check broken links into.
	 * Exclude all built_in post types except Post.
	 *
	 * @return array List of all post types detected.
	 */
	private static function get_post_types() {
		$post_types         = get_post_types();
		$allowed_post_types = array( 'page', 'post', 'video', 'videos', 'blog', 'photos' );
		return array_intersect( $post_types, $allowed_post_types );
	}

	/**
	 * Get all post taxonomies for theme compatibilty option.
	 * Exclude all built_in post types except category and post_tag.
	 *
	 * @param string $post_type The post type to retrieve taxonomies from.
	 *
	 * @return array The array with all taxonomies as key and their terms as value.
	 */
	private static function get_object_taxonomies( $post_type ) {
		$taxonomies_to_exclude = array( 'nav_menu', 'link_category', 'post_format' );
		$post_type_taxonomies  = get_object_taxonomies( $post_type );
		return array_filter(
			$post_type_taxonomies,
			function( $taxonomy ) use ( $taxonomies_to_exclude ) {
				return ! in_array( $taxonomy, $taxonomies_to_exclude, true );
			}
		);
	}

	/**
	 * Count published posts.
	 *
	 * @return int Number of published posts.
	 */
	public static function get_count_posts() {
		$published_posts = 0;
		foreach ( self::get_post_types()  as $post_type ) {
			$count_posts      = wp_count_posts( $post_type );
			$published_posts += $count_posts->publish;
		}
		return $published_posts;
	}

	/**
	 * Count published posts that have been scanned.
	 *
	 * @return int Number of scanned published posts.
	 */
	public static function get_count_scanned_posts() {
		$args  = array(
			'numberposts' => -1,
			'post_status' => 'publish',
			'post_type'   => self::get_post_types(),
			'fields'      => 'ids',
			'meta_key'    => 'DVIC_checked_date',
			'compare'     => 'EXISTS',
		);
		$posts = get_posts( $args );
		return count( $posts );
	}

	/**
	 * Get formatted posts with broken links.
	 *
	 * @see get_formatted_post()
	 *
	 * @return array An array of formatted posts that contain broken links.
	 */
	public static function get_formatted_posts_with_broken_links() {
		$args            = array(
			'numberposts' => 100,
			'post_status' => 'publish',
			'post_type'   => self::get_post_types(),
			'meta_key'    => 'DVIC_broken_links',
			'compare'     => 'EXISTS',
		);
		$posts           = get_posts( $args );
		$formatted_posts = array();
		foreach ( (array) $posts as $post ) {
			$formatted_posts[] = self::get_formatted_post( $post->ID );
		}
		return $formatted_posts;
	}


	/**
	 * Format a post its ID.
	 * Formatted posts are used in DVIC admin UI.
	 *
	 * @param mixed $post_id The ID of the post to format.
	 *
	 * @return array An array with the formatted post data.
	 */
	public static function get_formatted_post( $post_id ) {
		$formatted_post = array(
			'ID'          => $post_id,
			'type'        => get_post_type( $post_id ),
			'title'       => get_the_title( $post_id ),
			'scanned'     => get_post_meta( $post_id, 'DVIC_checked_date', true ),
			'thumbnail'   => get_the_post_thumbnail_url( $post_id ),
			'brokenLinks' => get_post_meta( $post_id, 'DVIC_broken_links', true ),
			'isChecked'   => false,
			'actions'     => array(
				'isBeingScanned' => false,
				'isBeingDrafted' => false,
				'isBeingDeleted' => false,
			),
		);
		return $formatted_post;
	}

	/**
	 * Get a list of excludede meta keys we don't want to find links in.
	 *
	 * @return array The list of meta keys to exclude.
	 */
	public static function get_excluded_meta_keys() {
		return array(
			'DVIC_checked_date',
			'DVIC_broken_links',
			'_elementor_data',
		);
	}

	/**
	 * Check if we exclude to find links in a given post meta key.
	 *
	 * @param string $meta_key The meta key to check.
	 *
	 * @return bool True if the meta key must be excluded, false if not.
	 */
	public static function is_excluded_meta_key( $meta_key ) {
		return in_array( $meta_key, self::get_excluded_meta_keys(), true );
	}

	/**
	 * Find all broken links in posts meta given a post ID.
	 * /!\ FIND BROKEN LINKS IN POST META ONLY.
	 * /!\ DOESN'T FIND BROKEN LINKS IN POST CONTENT.
	 * The script will sleep for 1 second for each Pornhub link to check to prevent temporary ban from Pornhub API.
	 *
	 * @param mixed $post_id                      The ID of the post to format.
	 * @param bool  $only_videos_links            Find only videos links if set to true, all links if not.
	 * @param bool  $stop_after_broken_link_found Stop scanning broken links after a first broken link is found.
	 *
	 * @return array An array that contains 3 keys:
	 * - links_tested_counter: an int counter of how many links have been tested in the post
	 * - has_broken_links: a boolean to know if there are some broken links in the post
	 * - formatted_post: an array that contain formatted post data for the UI (@see get_formatted_post())
	 */
	public static function find_broken_links_in_post( $post_id, $only_videos_links = false, $stop_after_broken_link_found = false ) {
		$all_post_metas       = get_post_meta( $post_id );
		$links_tested_counter = 0;
		$broken_links         = array();
		foreach ( (array) $all_post_metas as $meta_key => $array_meta_value ) {
			if ( self::is_excluded_meta_key( $meta_key ) ) {
				continue;
			}
			if ( count( $broken_links ) > 0 && true === $stop_after_broken_link_found ) {
				continue;
			}
			foreach ( (array) $array_meta_value as $meta_value ) {
				$links_in_meta_value = $only_videos_links ? self::find_videos_links_in_content( $meta_value ) : self::find_links_in_content( $meta_value );
				foreach ( (array) $links_in_meta_value as $link_url ) {
					$links_tested_counter++;
					$dvic_link = new \Link( $link_url );
					if ( 'pornhub' === $dvic_link->get_tube_id() ) {
						sleep( 1 );
					}
					if ( $dvic_link->is_broken() ) {
						$broken_links[] = array(
							'customField' => $meta_key,
							'code'        => $meta_value,
							'brokenLink'  => $link_url,
						);
					}
				}
			}
		}
		eval( WPSCORE()->eval_product_data( 'DVIC', 'update_post_meta' ) );
		return array(
			'links_tested_counter' => $links_tested_counter,
			'has_broken_links'     => empty( $broken_links ) ? false : true,
			'formatted_post'       => self::get_formatted_post( $post_id ),
		);
	}

	/**
	 * Extract all links from a given content string.
	 *
	 * @param string $content The content to extract links from.
	 *
	 * @return array An array of strings with all links urls.
	 */
	public static function find_links_in_content( $content = '' ) {
		$output      = array();
		$links_regex = '/(?<link>(?:(?:https?|ftp|file)?:?\/\/|www\.|ftp\.)(?:\([-\w0-9+&@#\/%=~_|$?!:;,.]*\)|[-\w+&@#\/%=~_|$?!:;,.])*(?:\([-A-Z0-9+&@#\/%=~_|$?!:;,.]*\)|[A-Z0-9+&@#\/%=~_|$]))/imu';
		preg_match_all( $links_regex, $content, $links_matches, PREG_SET_ORDER, 0 );
		foreach ( $links_matches as $match ) {
			$link = $match['link'];
			// ignore already saved links and ftp/file links.
			if ( in_array( $link, $output, true ) ) {
				continue;
			}
			if ( 0 === strpos( $link, 'file' ) ) {
				continue;
			}
			if ( 0 === strpos( $link, 'ftp' ) ) {
				continue;
			}
			$output[] = $link;
		}
		return $output;
	}

	/**
	 * Extract all videos links only from a given content string.
	 *
	 * @param string $content The content to extract links from.
	 *
	 * @return array An array of strings with all links urls.
	 */
	public static function find_videos_links_in_content( $content ) {
		$output = array();
		// proceed iframes links first.
		$iframes_links_regex = '/<iframe.*src=(?:"|\')(?<link>.*)(?:"|\')/imuU';
		preg_match_all( $iframes_links_regex, $content, $iframes_links_matches, PREG_SET_ORDER, 0 );
		foreach ( $iframes_links_matches as $match ) {
			$link = $match['link'];
			// ignore already saved links and ftp/file links.
			if ( in_array( $link, $output, true ) ) {
				continue;
			}
			if ( 0 === strpos( $link, 'file' ) ) {
				continue;
			}
			if ( 0 === strpos( $link, 'ftp' ) ) {
				continue;
			}
			$output[] = $link;
		}
		// proceed all links then.
		$all_links = self::find_links_in_content( $content );
		foreach ( $all_links as $link ) {
			$dvic_link = new \Link( $link );
			// ignore already saved links and non video links.
			if ( in_array( $link, $output, true ) || ! $dvic_link->has_video_extention() ) {
				continue;
			}
			$output[] = $link;
		}
		return $output;
	}
}
