From b8bb698f603c215f2e542e84fe97302acff00c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Menrath?= Date: Fri, 3 Jan 2025 19:31:58 +0100 Subject: [PATCH] Add backfilling via parsing of outbox --- .../collection/class-event-sources.php | 4 +- includes/activitypub/handler/class-accept.php | 8 + .../activitypub/model/class-event-source.php | 27 ++- includes/class-event-sources.php | 47 +++- includes/class-outbox-parser.php | 211 ++++++++++++++++++ 5 files changed, 286 insertions(+), 11 deletions(-) create mode 100644 includes/class-outbox-parser.php diff --git a/includes/activitypub/collection/class-event-sources.php b/includes/activitypub/collection/class-event-sources.php index b0176a8..209cf45 100644 --- a/includes/activitypub/collection/class-event-sources.php +++ b/includes/activitypub/collection/class-event-sources.php @@ -289,6 +289,8 @@ class Event_Sources { * @return WP_Post|false|null Post data on success, false or null on failure. */ public static function remove_event_source( $actor ) { + self::delete_event_source_transients(); + $actor = Event_Source::get_by_id( $actor ); if ( ! $actor ) { @@ -314,7 +316,6 @@ class Event_Sources { // If the deletion was successful delete all transients regarding event sources. if ( $result ) { self::queue_unfollow_actor( $actor ); - self::delete_event_source_transients(); } return $result; @@ -356,6 +357,7 @@ class Event_Sources { 'paged' => $page, 'orderby' => 'ID', 'order' => 'DESC', + 'post_status' => array( 'publish', 'pending', 'draft', 'auto-draft', 'future', 'private', 'inherit' ), ); $args = wp_parse_args( $args, $defaults ); diff --git a/includes/activitypub/handler/class-accept.php b/includes/activitypub/handler/class-accept.php index 7cdf1e6..b7b2ac5 100644 --- a/includes/activitypub/handler/class-accept.php +++ b/includes/activitypub/handler/class-accept.php @@ -59,6 +59,14 @@ class Accept { return; } \update_post_meta( $post_id, '_event_bridge_for_activitypub_accept_of_follow', $activity['id'] ); + \wp_update_post( + array( + 'ID' => $post_id, + 'post_status' => 'publish', + ) + ); } + + \do_action( 'event_bridge_for_activitypub_backfill_events', $activity['actor'] ); } } diff --git a/includes/activitypub/model/class-event-source.php b/includes/activitypub/model/class-event-source.php index f676146..b2249a2 100644 --- a/includes/activitypub/model/class-event-source.php +++ b/includes/activitypub/model/class-event-source.php @@ -75,6 +75,31 @@ class Event_Source extends Actor { return $this->id; } + /** + * Get the outbox. + * + * @return ?string The outbox URL. + */ + public function get_outbox() { + if ( $this->outbox ) { + return $this->outbox; + } + + $actor_json = \get_post_meta( $this->get__id(), 'activitypub_actor_json', true ); + + if ( ! $actor_json ) { + return null; + } + + $actor = \json_decode( $actor_json, true ); + + if ( ! isset( $actor['outbox'] ) ) { + return null; + } + + return $actor['outbox']; + } + /** * Get the WordPress post which stores the Event Source by the ActivityPub actor id of the event source. * @@ -226,7 +251,7 @@ class Event_Source extends Actor { 'post_type' => Event_Sources::POST_TYPE, 'post_name' => esc_url_raw( $this->get_id() ), 'post_excerpt' => sanitize_text_field( wp_kses( $this->get_summary(), 'user_description' ) ), - 'post_status' => 'publish', + 'post_status' => 'pending', 'meta_input' => $this->get_post_meta_input(), ); diff --git a/includes/class-event-sources.php b/includes/class-event-sources.php index 98cb907..f6f0f69 100644 --- a/includes/class-event-sources.php +++ b/includes/class-event-sources.php @@ -74,6 +74,9 @@ class Event_Sources { // Add the actors followed by the event sources feature to the `follow` collection of the used ActivityPub actor. \add_filter( 'activitypub_rest_following', array( self::class, 'add_event_sources_to_follow_collection' ), 10, 2 ); + + // Add action for backfilling the events. + Outbox_Parser::init(); } @@ -387,6 +390,7 @@ class Event_Sources { public static function validate_event_object( $valid, $param, $request ) { $json_params = $request->get_json_params(); + // Check if we should continue with the validation. if ( isset( $json_params['object']['type'] ) && 'Event' === $json_params['object']['type'] ) { $valid = true; } else { @@ -405,9 +409,17 @@ class Event_Sources { return $valid; } - $object = $json_params['object']; + return self::is_valid_activitypub_event_object( $json_params['object'] ); + } - if ( ! is_array( $object ) ) { + /** + * Check if the object is a valid ActivityPub event. + * + * @param array $event_object The (event) object as an associative array. + * @return bool|WP_Error True if the object is an valid ActivityPub Event, false or WP_Error if not. + */ + public static function is_valid_activitypub_event_object( $event_object ) { + if ( ! is_array( $event_object ) ) { return false; } @@ -417,21 +429,21 @@ class Event_Sources { 'name', ); - if ( array_intersect( $required, array_keys( $object ) ) !== $required ) { + if ( array_intersect( $required, array_keys( $event_object ) ) !== $required ) { return new WP_Error( 'event_bridge_for_activitypub_invalid_event_object', __( 'The Event object is missing a required attribute.', 'event-bridge-for-activitypub' ) ); } - if ( ! self::is_valid_activitypub_time_string( $object['startTime'] ) ) { + if ( ! self::is_valid_activitypub_time_string( $event_object['startTime'] ) ) { return new WP_Error( 'event_bridge_for_activitypub_event_object_is_not_in_the_future', __( 'Ignoring event that has already started.', 'event-bridge-for-activitypub' ) ); } - return $valid; + return true; } /** @@ -461,12 +473,14 @@ class Event_Sources { /** * Check if a given DateTime is already passed. * - * @param string $time_string The ActivityPub like time string. + * @param string|DateTime $time The ActivityPub like time string or DateTime object. * @return bool */ - public static function is_time_passed( $time_string ) { - // Create a DateTime object from the ActivityPub time string. - $time = new DateTime( $time_string, new DateTimeZone( 'UTC' ) ); + public static function is_time_passed( $time ) { + if ( ! $time instanceof DateTime ) { + // Create a DateTime object from the ActivityPub time string. + $time = new DateTime( $time, new DateTimeZone( 'UTC' ) ); + } // Get the current time in UTC. $current_time = new DateTime( 'now', new DateTimeZone( 'UTC' ) ); @@ -475,6 +489,21 @@ class Event_Sources { return $time < $current_time; } + /** + * Determine whether an Event is an ongoing or future event. + * + * @param array $event_object The ActivityPub Event as an associative array. + * @return bool + */ + public static function is_ongoing_or_future_event( $event_object ) { + if ( isset( $event_object['endTime'] ) ) { + $time = $event_object['endTime']; + } else { + $time = new DateTime( $event_object['startTime'], new DateTimeZone( 'UTC' ) ) + 3 * HOUR_IN_SECONDS; + } + return ! self::is_time_passed( $time ); + } + /** * Check that an ActivityPub actor is an event source (i.e. it is followed by the ActivityPub blog actor). * diff --git a/includes/class-outbox-parser.php b/includes/class-outbox-parser.php new file mode 100644 index 0000000..5ee882a --- /dev/null +++ b/includes/class-outbox-parser.php @@ -0,0 +1,211 @@ + 0 && count( $parsed_events ) >= $max_items ) { + break; + } + + // Check if it is a create or update Activity. + if ( ! self::is_create_or_update_activity( $activity ) ) { + continue; + } + + // If no object is set we cannot process anything. + if ( ! isset( $activity['object'] ) ) { + continue; + } + + // Check if the Event object meets the minimum requirements and is valid. + $is_valid = Event_Sources::is_valid_activitypub_event_object( $activity['object'] ); + if ( ! $is_valid || \is_wp_error( $is_valid ) ) { + continue; + } + + // Check if the event is in the future or ongoing. + if ( Event_Sources::is_ongoing_or_future_event( $activity['object'] ) ) { + $parsed_events[] = $activity['object']; + } + } + + return $parsed_events; + } + + /** + * Import events from the items of an outbox. + * + * @param array $items The items/orderedItems as an associative array. + * @param string $actor The actor that owns the items. + * @param int $limit The limit of how many events to save locally. + * @return int The number of saved events (at least attempted). + */ + public static function import_events_from_items( $items, $actor, $limit = -1 ) { + $events = self::parse_items_for_events( $items, $limit ); + + $transmogrifier = Setup::get_transmogrifier(); + + if ( ! $transmogrifier ) { + return; + } + + $count = 0; + + foreach ( $events as $event ) { + $transmogrifier->save( $event, $actor ); + ++$count; + if ( $limit > 0 && $count >= $limit ) { + break; + } + } + + return count( $events ); + } + + /** + * Schedule the import of events from an outbox OrderedCollection or OrderedCollectionPage. + * + * @param string $url The url of the current page or outbox. + * @param string $actor The ActivityPub ID/URL of the actor that owns the outbox. + * @param int $delay The delay of the current time in seconds. + * @return void + */ + public static function queue_importing_from_outbox( $url, $actor, $delay = 10 ) { + $hook = 'event_bridge_for_activitypub_import_events_from_outbox'; + $args = array( $url, $actor ); + + if ( \wp_next_scheduled( $hook, $args ) ) { + return; + } + + return \wp_schedule_single_event( \time() + $delay, $hook, $args ); + } + + /** + * Initialize the backfilling of events via the outbox of an ActivityPub actor. + * + * @param string $actor The ActivityPub ID of the actor which owns the outbox. + * @return bool|WP_Error + */ + public static function backfill_events( $actor ) { + // Initiate parsing of outbox collection. + $outbox_url = Event_Source::get_by_id( $actor )->get_outbox(); + + if ( ! $outbox_url ) { + return; + } + return self::queue_importing_from_outbox( $outbox_url, $actor ); + } + + /** + * Import events from an outbox: OrderedCollection or OrderedCollectionPage. + * + * @param string $url The url of the current page or outbox. + * @param string $actor The ActivityPub ID/URL of the actor that owns the outbox. + * @return void + */ + public static function import_events_from_outbox( $url, $actor ) { + $response = Http::get( $url ); + + if ( \is_wp_error( $response ) ) { + return; + } + + $outbox = \wp_remote_retrieve_body( $response ); + $outbox = \json_decode( $outbox, true ); + + // Validate the outbox type and structure. + if ( ! is_array( $outbox ) || ! isset( $outbox['type'] ) ) { + return; + } + + $current_count = (int) \get_option( "event_bridge_for_activitypub_backfill_count_{$actor}", 0 ); + + if ( $current_count >= self::MAX_EVENTS_TO_IMPORT ) { + // Stop importing as the limit is reached. + return; + } + + // Process orderedItems if they exist (non-paginated outbox). + if ( isset( $outbox['orderedItems'] ) && is_array( $outbox['orderedItems'] ) ) { + $current_count += self::import_events_from_items( $outbox['orderedItems'], $actor, self::MAX_EVENTS_TO_IMPORT - $current_count ); + } + + // Update the count. + \update_option( "activitypub_import_event_count_{$actor}", $current_count ); + + // If the count is already exceeded abort here. + if ( $current_count >= self::MAX_EVENTS_TO_IMPORT ) { + return; + } + + // Determine the pagination URL based on the outbox type. + $pagination_url = null; + + if ( 'OrderedCollection' === $outbox['type'] && ! empty( $outbox['first'] ) && is_string( $outbox['first'] ) ) { + $pagination_url = $outbox['first']; + } elseif ( 'OrderedCollectionPage' === $outbox['type'] && ! empty( $outbox['next'] ) && is_string( $outbox['next'] ) ) { + $pagination_url = $outbox['next']; + } + + // Trigger the action if a pagination URL is found. + if ( $pagination_url ) { + self::queue_importing_from_outbox( $pagination_url, $actor ); + } + } +}