pomo performance improvements. Props nbachiyski. fixes #10165

git-svn-id: http://svn.automattic.com/wordpress/trunk@12174 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
ryan 2009-11-12 16:05:43 +00:00
parent 742349b291
commit e5a98d208a
2 changed files with 229 additions and 114 deletions

View File

@ -2,7 +2,7 @@
/**
* Class for working with MO files
*
* @version $Id: mo.php 221 2009-09-07 21:08:21Z nbachiyski $
* @version $Id: mo.php 293 2009-11-12 15:43:50Z nbachiyski $
* @package pomo
* @subpackage mo
*/
@ -21,10 +21,9 @@ class MO extends Gettext_Translations {
* @param string $filename MO file to load
*/
function import_from_file($filename) {
$reader = new POMO_CachedIntFileReader($filename);
if (isset($reader->error)) {
$reader = new POMO_FileReader($filename);
if (!$reader->is_resource())
return false;
}
return $this->import_from_reader($reader);
}
@ -113,61 +112,111 @@ class MO extends Gettext_Translations {
}
function import_from_reader($reader) {
$reader->setEndian('little');
$endian = MO::get_byteorder($reader->readint32());
if (false === $endian) {
$endian_string = MO::get_byteorder($reader->readint32());
if (false === $endian_string) {
return false;
}
$reader->setEndian($endian);
$reader->setEndian($endian_string);
$revision = $reader->readint32();
$total = $reader->readint32();
// get addresses of array of lenghts and offsets for original string and translations
$originals_lenghts_addr = $reader->readint32();
$translations_lenghts_addr = $reader->readint32();
$endian = ('big' == $endian_string)? 'N' : 'V';
$header = $reader->read(24);
if ($reader->strlen($header) != 24)
return false;
// parse header
$header = unpack("{$endian}revision/{$endian}total/{$endian}originals_lenghts_addr/{$endian}translations_lenghts_addr/{$endian}hash_length/{$endian}hash_addr", $header);
if (!is_array($header))
return false;
extract( $header );
// support revision 0 of MO format specs, only
if ($revision != 0)
return false;
// seek to data blocks
$reader->seekto($originals_lenghts_addr);
$originals_lenghts = $reader->readint32array($total * 2); // each of
$reader->seekto($translations_lenghts_addr);
$translations_lenghts = $reader->readint32array($total * 2);
$length = create_function('$i', 'return $i * 2 + 1;');
$offset = create_function('$i', 'return $i * 2 + 2;');
// read originals' indices
$originals_lengths_length = $translations_lenghts_addr - $originals_lenghts_addr;
if ( $originals_lengths_length != $total * 8 )
return false;
for ($i = 0; $i < $total; ++$i) {
$reader->seekto($originals_lenghts[$offset($i)]);
$original = $reader->read($originals_lenghts[$length($i)]);
$reader->seekto($translations_lenghts[$offset($i)]);
$translation = $reader->read($translations_lenghts[$length($i)]);
if ('' == $original) {
$originals = $reader->read($originals_lengths_length);
if ( $reader->strlen( $originals ) != $originals_lengths_length )
return false;
// read translations' indices
$translations_lenghts_length = $hash_addr - $translations_lenghts_addr;
if ( $translations_lenghts_length != $total * 8 )
return false;
$translations = $reader->read($translations_lenghts_length);
if ( $reader->strlen( $translations ) != $translations_lenghts_length )
return false;
// transform raw data into set of indices
$originals = $reader->str_split( $originals, 8 );
$translations = $reader->str_split( $translations, 8 );
// skip hash table
$strings_addr = $hash_addr + $hash_length * 4;
$reader->seekto($strings_addr);
$strings = $reader->read_all();
$reader->close();
for ( $i = 0; $i < $total; $i++ ) {
$o = unpack( "{$endian}length/{$endian}pos", $originals[$i] );
$t = unpack( "{$endian}length/{$endian}pos", $translations[$i] );
if ( !$o || !$t ) return false;
// adjust offset due to reading strings to separate space before
$o['pos'] -= $strings_addr;
$t['pos'] -= $strings_addr;
$original = $reader->substr( $strings, $o['pos'], $o['length'] );
$translation = $reader->substr( $strings, $t['pos'], $t['length'] );
if ('' === $original) {
$this->set_headers($this->make_headers($translation));
} else {
$this->add_entry($this->make_entry($original, $translation));
$entry = &$this->make_entry($original, $translation);
$this->entries[$entry->key()] = &$entry;
}
}
return true;
}
/**
* Build a Translation_Entry from original string and translation strings,
* found in a MO file
*
* @static
* @param string $original original string to translate from MO file. Might contain
* 0x04 as context separator or 0x00 as singular/plural separator
* @param string $translation translation string from MO file. Might contain
* 0x00 as a plural translations separator
*/
function &make_entry($original, $translation) {
$args = array();
$entry = & new Translation_Entry();
// look for context
$parts = explode(chr(4), $original);
if (isset($parts[1])) {
$original = $parts[1];
$args['context'] = $parts[0];
$entry->context = $parts[0];
}
// look for plural original
$parts = explode(chr(0), $original);
$args['singular'] = $parts[0];
$entry->singular = $parts[0];
if (isset($parts[1])) {
$args['plural'] = $parts[1];
$entry->is_plural = true;
$entry->plural = $parts[1];
}
// plural translations are also separated by \0
$args['translations'] = explode(chr(0), $translation);
$entry = & new Translation_Entry($args);
$entry->translations = explode(chr(0), $translation);
return $entry;
}
@ -178,7 +227,5 @@ class MO extends Gettext_Translations {
function get_plural_forms_count() {
return $this->_nplurals;
}
}
endif;

View File

@ -3,64 +3,182 @@
* Classes, which help reading streams of data from files.
* Based on the classes from Danilo Segan <danilo@kvota.net>
*
* @version $Id: streams.php 223 2009-09-07 21:20:13Z nbachiyski $
* @version $Id: streams.php 293 2009-11-12 15:43:50Z nbachiyski $
* @package pomo
* @subpackage streams
*/
if ( !class_exists( 'POMO_Reader' ) ):
class POMO_Reader {
var $endian = 'little';
var $_post = '';
function POMO_Reader() {
$this->is_overloaded = ((ini_get("mbstring.func_overload") & 2) != 0) && function_exists('mb_substr');
$this->_pos = 0;
}
/**
* Sets the endianness of the file.
*
* @param $endian string 'big' or 'little'
*/
function setEndian($endian) {
$this->endian = $endian;
}
/**
* Reads a 32bit Integer from the Stream
*
* @return mixed The integer, corresponding to the next 32 bits from
* the stream of false if there are not enough bytes or on error
*/
function readint32() {
$bytes = $this->read(4);
if (4 != $this->strlen($bytes))
return false;
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
$int = unpack($endian_letter, $bytes);
return array_shift($int);
}
/**
* Reads an array of 32-bit Integers from the Stream
*
* @param integer count How many elements should be read
* @return mixed Array of integers or false if there isn't
* enough data or on error
*/
function readint32array($count) {
$bytes = $this->read(4 * $count);
if (4*$count != $this->strlen($bytes))
return false;
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
return unpack($endian_letter.$count, $bytes);
}
function substr($string, $start, $length) {
if ($this->is_overloaded) {
return mb_substr($string, $start, $length, 'ascii');
} else {
return substr($string, $start, $length);
}
}
function strlen($string) {
if ($this->is_overloaded) {
return mb_strlen($string, 'ascii');
} else {
return strlen($string);
}
}
function str_split($string, $chunk_size) {
if (!function_exists('str_split')) {
$length = $this->strlen($string);
$out = array();
for ($i = 0; $i < $length; $i += $chunk_size)
$out[] = $this->substr($string, $i, $chunk_size);
return $out;
} else {
return str_split( $string, $chunk_size );
}
}
function pos() {
return $this->_pos;
}
function is_resource() {
return true;
}
function close() {
return true;
}
}
endif;
if ( !class_exists( 'POMO_FileReader' ) ):
class POMO_FileReader extends POMO_Reader {
function POMO_FileReader($filename) {
parent::POMO_Reader();
$this->_f = fopen($filename, 'r');
}
function read($bytes) {
return fread($this->_f, $bytes);
}
function seekto($pos) {
if ( -1 == fseek($this->_f, $pos, SEEK_SET)) {
return false;
}
$this->_pos = $pos;
return true;
}
function is_resource() {
return is_resource($this->_f);
}
function feof() {
return feof($this->_f);
}
function close() {
return fclose($this->_f);
}
function read_all() {
$all = '';
while ( !$this->feof() )
$all .= $this->read(4096);
return $all;
}
}
endif;
if ( !class_exists( 'POMO_StringReader' ) ):
/**
* Provides file-like methods for manipulating a string instead
* of a physical file.
*/
class POMO_StringReader {
var $_pos;
var $_str;
class POMO_StringReader extends POMO_Reader {
var $_str = '';
function POMO_StringReader($str = '') {
parent::POMO_Reader();
$this->_str = $str;
$this->_pos = 0;
$this->is_overloaded = ((ini_get("mbstring.func_overload") & 2) != 0) && function_exists('mb_substr');
}
function _substr($string, $start, $length) {
if ($this->is_overloaded) {
return mb_substr($string,$start,$length,'ascii');
} else {
return substr($string,$start,$length);
}
}
function _strlen($string) {
if ($this->is_overloaded) {
return mb_strlen($string,'ascii');
} else {
return strlen($string);
}
}
function read($bytes) {
$data = $this->_substr($this->_str, $this->_pos, $bytes);
$data = $this->substr($this->_str, $this->_pos, $bytes);
$this->_pos += $bytes;
if ($this->_strlen($this->_str) < $this->_pos) $this->_pos = $this->_strlen($this->_str);
if ($this->strlen($this->_str) < $this->_pos) $this->_pos = $this->strlen($this->_str);
return $data;
}
function seekto($pos) {
$this->_pos = $pos;
if ($this->_strlen($this->_str) < $this->_pos) $this->_pos = $this->_strlen($this->_str);
return $this->_pos;
}
function pos() {
if ($this->strlen($this->_str) < $this->_pos) $this->_pos = $this->strlen($this->_str);
return $this->_pos;
}
function length() {
return $this->_strlen($this->_str);
return $this->strlen($this->_str);
}
function read_all() {
return $this->substr($this->_str, $this->_pos, $this->strlen($this->_str));
}
}
endif;
@ -81,61 +199,11 @@ endif;
if ( !class_exists( 'POMO_CachedIntFileReader' ) ):
/**
* Allows reading integers from a file.
* Reads the contents of the file in the beginning.
*/
class POMO_CachedIntFileReader extends POMO_CachedFileReader {
var $endian = 'little';
/**
* Opens a file and caches it.
*
* @param $filename string name of the file to be opened
* @param $endian string endianness of the words in the file, allowed
* values are 'little' or 'big'. Default value is 'little'
*/
function POMO_CachedIntFileReader($filename, $endian = 'little') {
$this->endian = $endian;
function POMO_CachedIntFileReader($filename) {
parent::POMO_CachedFileReader($filename);
}
/**
* Sets the endianness of the file.
*
* @param $endian string 'big' or 'little'
*/
function setEndian($endian) {
$this->endian = $endian;
}
/**
* Reads a 32bit Integer from the Stream
*
* @return mixed The integer, corresponding to the next 32 bits from
* the stream of false if there are not enough bytes or on error
*/
function readint32() {
$bytes = $this->read(4);
if (4 != $this->_strlen($bytes))
return false;
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
$int = unpack($endian_letter, $bytes);
return array_shift($int);
}
/**
* Reads an array of 32-bit Integers from the Stream
*
* @param integer count How many elements should be read
* @return mixed Array of integers or false if there isn't
* enough data or on error
*/
function readint32array($count) {
$bytes = $this->read(4 * $count);
if (4*$count != $this->_strlen($bytes))
return false;
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
return unpack($endian_letter.$count, $bytes);
}
}
endif;