From e11cb17f77bc01f8224df76c0850f96715a8cbfa Mon Sep 17 00:00:00 2001 From: Mark Jaquith Date: Wed, 27 Mar 2013 08:31:12 +0000 Subject: [PATCH] Extract chats as structured data. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add_chat_detection_format() — to add a chat regex pattern * get_content_chat() — to grab a chat from content * get_the_chat() — grab the chat from the current (or passed) post * the_chat() — output the chat in formatted HTML * paginate_content() — puts the splitting stuff into a function * get_paged_content() — grabs a page of raw content, needed to paginate chats properly see #23625. props wonderboymusic, lancewillett. git-svn-id: http://core.svn.wordpress.org/trunk@23804 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/post-formats.php | 213 +++++++++++++++++++++++++++++++++++ wp-includes/query.php | 45 ++++++++ 2 files changed, 258 insertions(+) diff --git a/wp-includes/post-formats.php b/wp-includes/post-formats.php index ec68932745..a37eeaab88 100644 --- a/wp-includes/post-formats.php +++ b/wp-includes/post-formats.php @@ -391,6 +391,219 @@ function post_formats_compat( $content, $id = 0 ) { return $output; } +/** + * Add chat detection support to the `get_content_chat()` chat parser + * + * @since 3.6.0 + * + * @global array $_wp_chat_parsers + * @param string $name Unique identifier for chat format. Example: IRC + * @param string $newline_regex RegEx to match the start of a new line, typically when a new "username:" appears + * The parser will handle up to 3 matched expressions + * $matches[0] = the string before the user's message starts + * $matches[1] = the time of the message, if present + * $matches[2] = the author/username + * OR + * $matches[0] = the string before the user's message starts + * $matches[1] = the author/username + * @param string $delimiter_regex RegEx to determine where to split the username syntax from the chat message + */ +function add_chat_detection_format( $name, $newline_regex, $delimiter_regex ) { + global $_wp_chat_parsers; + + if ( empty( $_wp_chat_parsers ) ) + $_wp_chat_parsers = array(); + + $_wp_chat_parsers = array( $name => array( $newline_regex, $delimiter_regex ) ) + $_wp_chat_parsers; +} +add_chat_detection_format( 'IM', '#^([^:]+):#', '#[:]#' ); +add_chat_detection_format( 'Skype', '#^(\[.+?\])\s([^:]+):#', '#[:]#' ); + +/** + * Deliberately interpret passed content as a chat transcript that is optionally + * followed by commentary + * + * If the content does not contain username syntax, assume that it does not contain + * chat logs and return + * + * @since 3.6.0 + * + * Example: + * + * One stanza of chat: + * Scott: Hey, let's chat! + * Helen: No. + * + * $stanzas = array( + * array( + * array( + * 'time' => '', + * 'author' => 'Scott', + * 'messsage' => "Hey, let's chat!" + * ), + * array( + * 'time' => '', + * 'author' => 'Helen', + * 'message' => 'No.' + * ) + * ) + * ) + * @param string $content A string which might contain chat data. + * @param boolean $remove Whether to remove the found data from the passed content. + * @return array A chat log as structured data + */ +function get_content_chat( &$content, $remove = false ) { + global $_wp_chat_parsers; + + $trimmed = trim( $content ); + if ( empty( $trimmed ) ) + return array(); + + $has_match = false; + $matched_parser = false; + foreach ( $_wp_chat_parsers as $parser ) { + @list( $newline_regex ) = $parser; + if ( preg_match( $newline_regex, $trimmed ) ) { + $has_match = true; + $matched_parser = $parser; + break; + } + } + + if ( false === $matched_parser ) + return array(); + + @list( $newline_regex, $delimiter_regex ) = $parser; + + $last_index = 0; + $stanzas = array(); + $lines = explode( "\n", make_clickable( $trimmed ) ); + + $author = $time = ''; + $data = array(); + $stanza = array(); + + foreach ( $lines as $index => $line ) { + $line = trim( $line ); + + if ( empty( $line ) ) { + if ( ! empty( $author ) ) { + $stanza[] = array( + 'time' => $time, + 'author' => $author, + 'message' => join( ' ', $data ) + ); + } + + $stanzas[] = $stanza; + $last_index = $index; + $stanza = array(); + $author = $time = ''; + $data = array(); + if ( ! empty( $lines[$index + 1] ) && ! preg_match( $delimiter_regex, $lines[$index + 1] ) ) + break; + } + + $matches = array(); + $matched = preg_match( $newline_regex, $line, $matches ); + $author_match = empty( $matches[2] ) ? $matches[1] : $matches[2]; + // assume username syntax if no whitespace is present + $no_ws = $matched && ! preg_match( '#\s#', $author_match ); + // allow script-like stanzas + $has_ws = $matched && preg_match( '#\s#', $author_match ) && empty( $lines[$index + 1] ) && empty( $lines[$index - 1] ); + if ( $matched && ( ! empty( $matches[2] ) || ( $no_ws || $has_ws ) ) ) { + if ( ! empty( $author ) ) { + $stanza[] = array( + 'time' => $time, + 'author' => $author, + 'message' => join( ' ', $data ) + ); + $data = array(); + } + + $time = empty( $matches[2] ) ? '' : $matches[1]; + $author = $author_match; + $data[] = trim( str_replace( $matches[0], '', $line ) ); + } elseif ( preg_match( '#\S#', $line ) ) { + $data[] = $line; + } + } + + if ( ! empty( $author ) ) { + $stanza[] = array( + 'time' => $time, + 'author' => $author, + 'message' => trim( join( ' ', $data ) ) + ); + } + + if ( ! empty( $stanza ) ) + $stanzas[] = $stanza; + + if ( $remove ) + $content = trim( join( "\n", array_slice( $lines, $last_index ) ) ); + + return $stanzas; +} + +/** + * Retrieve structured chat data from the current or passed post + * + * @since 3.6.0 + * + * @param int $id Optional. Post ID + * @return array + */ +function get_the_chat( $id = 0 ) { + $post = empty( $id ) ? clone get_post() : get_post( $id ); + if ( empty( $post ) ) + return array(); + + $data = get_content_chat( get_paged_content( $post->post_content ) ); + if ( empty( $data ) ) + return array(); + + return $data; +} + +/** + * Output HTML for a given chat's structured data. Themes can use this as a + * template tag in place of the_content() for Chat post format templates. + * + * @since 3.6.0 + * + * @uses get_the_chat() + * + * @print HTML + */ +function the_chat() { + $output = '
'; + + $stanzas = get_the_chat(); + + foreach ( $stanzas as $stanza ) { + foreach ( $stanza as $row ) { + $time = ''; + if ( ! empty( $row['time'] ) ) + $time = sprintf( '', esc_html( $row['time'] ) ); + + $output .= sprintf( + '
%2$s %3$s:
+
%4$s
+ ', + esc_attr( strtolower( $row['author'] ) ), // Slug. + $time, + esc_html( $row['author'] ), + esc_html( $row['message'] ) + ); + } + } + + $output .= '
'; + + echo $output; +} + /** * Extract a URL from passed content, if possible * Checks for a URL on the first line of the content or the first encountered href attribute. diff --git a/wp-includes/query.php b/wp-includes/query.php index 4f0ec99bab..370f443f12 100644 --- a/wp-includes/query.php +++ b/wp-includes/query.php @@ -3621,6 +3621,51 @@ function wp_old_slug_redirect() { exit; endif; } +/** + * Split the passed content by + * + * @since 3.6.0 + * + * @param string $content Content to split + * @return array Paged content + */ +function paginate_content( $content ) { + $content = str_replace( "\n\n", '', $content ); + $content = str_replace( "\n", '', $content ); + $content = str_replace( "\n", '', $content ); + return explode( '', $content); +} + +/** + * Return content offset by $page + * + * @since 3.6.0 + * + * @param string $content + * @return string + */ +function get_paged_content( $content = null, $paged = null ) { + global $page; + if ( empty( $page ) ) + $page = 1; + + if ( empty( $paged ) ) + $paged = $page; + + if ( empty( $content ) ) { + $post = get_post(); + if ( empty( $post ) ) + return; + + $content = $post->post_content; + } + + $pages = paginate_content( $content ); + if ( isset( $pages[$paged - 1] ) ) + return $pages[$paged - 1]; + + return reset( $pages ); +} /** * Set up global post data.