Extract chats as structured data.

* add_chat_detection_format() — to add a chat regex pattern * get_content_chat() — to grab a chat from content * get_the_chat() — grab the chat from the current (or passed) post * the_chat() — output the chat in formatted HTML * paginate_content() — puts the  splitting stuff into a function * get_paged_content() — grabs a page of raw content, needed to paginate chats properly see #23625. props wonderboymusic, lancewillett. git-svn-id: http://core.svn.wordpress.org/trunk@23804 1a063a9b-81f0-0310-95a4-ce76da25c4cd
2024-06-25 22:35:02 +02:00 · 2013-03-27 08:31:12 +00:00 · 2013-03-27 08:31:12 +00:00 · e11cb17f77
commit e11cb17f77
parent 6bf47b869f
2 changed files with 258 additions and 0 deletions
--- a/wp-includes/post-formats.php
+++ b/wp-includes/post-formats.php
@ -391,6 +391,219 @@ function post_formats_compat( $content, $id = 0 ) {
 	return $output;
 }

+/**
+ * Add chat detection support to the `get_content_chat()` chat parser
+ *
+ * @since 3.6.0
+ *
+ * @global array $_wp_chat_parsers
+ * @param string $name Unique identifier for chat format. Example: IRC
+ * @param string $newline_regex RegEx to match the start of a new line, typically when a new "username:" appears
+ *	The parser will handle up to 3 matched expressions
+ *	$matches[0] = the string before the user's message starts
+ *	$matches[1] = the time of the message, if present
+ *	$matches[2] = the author/username
+ *	OR
+ *	$matches[0] = the string before the user's message starts
+ *	$matches[1] = the author/username
+ * @param string $delimiter_regex RegEx to determine where to split the username syntax from the chat message
+ */
+function add_chat_detection_format( $name, $newline_regex, $delimiter_regex ) {
+	global $_wp_chat_parsers;
+
+	if ( empty( $_wp_chat_parsers ) )
+		$_wp_chat_parsers = array();
+
+	$_wp_chat_parsers = array( $name => array( $newline_regex, $delimiter_regex ) ) + $_wp_chat_parsers;
+}
+add_chat_detection_format( 'IM', '#^([^:]+):#', '#[:]#' );
+add_chat_detection_format( 'Skype', '#^(\[.+?\])\s([^:]+):#', '#[:]#' );
+
+/**
+ * Deliberately interpret passed content as a chat transcript that is optionally
+ * followed by commentary
+ *
+ * If the content does not contain username syntax, assume that it does not contain
+ * chat logs and return
+ *
+ * @since 3.6.0
+ *
+ * Example:
+ *
+ * One stanza of chat:
+ * Scott: Hey, let's chat!
+ * Helen: No.
+ *
+ * $stanzas = array(
+ *     array(
+ *         array(
+ *             'time' => '',
+ *             'author' => 'Scott',
+ *             'messsage' => "Hey, let's chat!"
+ *         ),
+ *         array(
+ *             'time' => '',
+ *             'author' => 'Helen',
+ *             'message' => 'No.'
+ *         )
+ *     )
+ * )
+ * @param string $content A string which might contain chat data.
+ * @param boolean $remove Whether to remove the found data from the passed content.
+ * @return array A chat log as structured data
+ */
+function get_content_chat( &$content, $remove = false ) {
+	global $_wp_chat_parsers;
+
+	$trimmed = trim( $content );
+	if ( empty( $trimmed ) )
+		return array();
+
+	$has_match = false;
+	$matched_parser = false;
+	foreach ( $_wp_chat_parsers as $parser ) {
+		@list( $newline_regex ) = $parser;
+		if ( preg_match( $newline_regex, $trimmed ) ) {
+			$has_match = true;
+			$matched_parser = $parser;
+			break;
+		}
+	}
+
+	if ( false === $matched_parser )
+		return array();
+
+	@list( $newline_regex, $delimiter_regex ) = $parser;
+
+	$last_index = 0;
+	$stanzas = array();
+	$lines = explode( "\n", make_clickable( $trimmed ) );
+
+	$author = $time = '';
+	$data = array();
+	$stanza = array();
+
+	foreach ( $lines as $index => $line ) {
+		$line = trim( $line );
+
+		if ( empty( $line ) ) {
+			if ( ! empty( $author ) ) {
+				$stanza[] = array(
+					'time' => $time,
+					'author' => $author,
+					'message' => join( ' ', $data )
+				);
+			}
+
+			$stanzas[] = $stanza;
+			$last_index = $index;
+			$stanza = array();
+			$author = $time = '';
+			$data = array();
+			if ( ! empty( $lines[$index + 1] ) && ! preg_match( $delimiter_regex, $lines[$index + 1] ) )
+				break;
+		}
+
+		$matches = array();
+		$matched = preg_match( $newline_regex, $line, $matches );
+		$author_match = empty( $matches[2] ) ? $matches[1] : $matches[2];
+		// assume username syntax if no whitespace is present
+		$no_ws = $matched && ! preg_match( '#\s#', $author_match );
+		// allow script-like stanzas
+		$has_ws = $matched && preg_match( '#\s#', $author_match ) && empty( $lines[$index + 1] ) && empty( $lines[$index - 1] );
+		if ( $matched && ( ! empty( $matches[2] ) || ( $no_ws || $has_ws ) ) ) {
+			if ( ! empty( $author ) ) {
+				$stanza[] = array(
+					'time' => $time,
+					'author' => $author,
+					'message' => join( ' ', $data )
+				);
+				$data = array();
+			}
+
+			$time = empty( $matches[2] ) ? '' : $matches[1];
+			$author = $author_match;
+			$data[] = trim( str_replace( $matches[0], '', $line ) );
+		} elseif ( preg_match( '#\S#', $line ) ) {
+			$data[] = $line;
+		}
+	}
+
+	if ( ! empty( $author ) ) {
+		$stanza[] = array(
+			'time' => $time,
+			'author' => $author,
+			'message' => trim( join( ' ', $data ) )
+		);
+	}
+
+	if ( ! empty( $stanza ) )
+		$stanzas[] = $stanza;
+
+	if ( $remove )
+		$content = trim( join( "\n", array_slice( $lines, $last_index ) ) );
+
+	return $stanzas;
+}
+
+/**
+ * Retrieve structured chat data from the current or passed post
+ *
+ * @since 3.6.0
+ *
+ * @param int $id Optional. Post ID
+ * @return array
+ */
+function get_the_chat( $id = 0 ) {
+	$post = empty( $id ) ? clone get_post() : get_post( $id );
+	if ( empty( $post ) )
+		return array();
+
+	$data = get_content_chat( get_paged_content( $post->post_content ) );
+	if ( empty( $data ) )
+		return array();
+
+	return $data;
+}
+
+/**
+ * Output HTML for a given chat's structured data. Themes can use this as a
+ * template tag in place of the_content() for Chat post format templates.
+ *
+ * @since 3.6.0
+ *
+ * @uses get_the_chat()
+ *
+ * @print HTML
+ */
+function the_chat() {
+	$output = '<dl class="chat-transcript">';
+
+	$stanzas = get_the_chat();
+
+	foreach ( $stanzas as $stanza ) {
+		foreach ( $stanza as $row ) {
+			$time = '';
+			if ( ! empty( $row['time'] ) )
+				$time = sprintf( '<time>%s</time>', esc_html( $row['time'] ) );
+
+			$output .= sprintf(
+				'<dt class="chat-author chat-author-%1$s vcard">%2$s <cite class="fn">%3$s</cite>: </dt>
+					<dd class="chat-text">%4$s</dd>
+				',
+				esc_attr( strtolower( $row['author'] ) ), // Slug.
+				$time,
+				esc_html( $row['author'] ),
+				esc_html( $row['message'] )
+			);
+		}
+	}
+
+	$output .= '</dl><!-- .chat-transcript -->';
+
+	echo $output;
+}
+
 /**
 * Extract a URL from passed content, if possible
 * Checks for a URL on the first line of the content or the first encountered href attribute.
--- a/wp-includes/query.php
+++ b/wp-includes/query.php
@ -3621,6 +3621,51 @@ function wp_old_slug_redirect() {
 		exit;
 	endif;
 }
+/**
+ * Split the passed content by <!--nextpage-->
+ *
+ * @since 3.6.0
+ *
+ * @param string $content Content to split
+ * @return array Paged content
+ */
+function paginate_content( $content ) {
+	$content = str_replace( "\n<!--nextpage-->\n", '<!--nextpage-->', $content );
+	$content = str_replace( "\n<!--nextpage-->", '<!--nextpage-->', $content );
+	$content = str_replace( "<!--nextpage-->\n", '<!--nextpage-->', $content );
+	return explode( '<!--nextpage-->', $content);
+}
+
+/**
+ * Return content offset by $page
+ *
+ * @since 3.6.0
+ *
+ * @param string $content
+ * @return string
+ */
+function get_paged_content( $content = null, $paged = null ) {
+	global $page;
+	if ( empty( $page ) )
+		$page = 1;
+
+	if ( empty( $paged ) )
+		$paged = $page;
+
+	if ( empty( $content ) ) {
+		$post = get_post();
+		if ( empty( $post ) )
+			return;
+
+		$content = $post->post_content;
+	}
+
+	$pages = paginate_content( $content );
+	if ( isset( $pages[$paged - 1] ) )
+		return $pages[$paged - 1];
+
+	return reset( $pages );
+}

 /**
 * Set up global post data.