build : SpreadsheetReader.php
<?php
/**
* Main class for spreadsheet reading
*
* @version 0.5.10
* @author Martins Pilsetnieks
*/
class SpreadsheetReader implements SeekableIterator, Countable
{
const TYPE_XLSX = "XLSX";
const TYPE_XLS = "XLS";
const TYPE_CSV = "CSV";
const TYPE_ODS = "ODS";
private $Options = array(
"Delimiter" => "",
"Enclosure" => """
);
/**
* @var int Current row in the file
*/
private $Index = 0;
/**
* @var SpreadsheetReader_* Handle for the reader object
*/
private $Handle = array();
/**
* @var TYPE_* Type of the contained spreadsheet
*/
private $Type = false;
/**
* @param string Path to file
* @param string Original filename (in case of an uploaded file), used to determine file type, optional
* @param string MIME type from an upload, used to determine file type, optional
*/
public function __construct($Filepath, $OriginalFilename = false, $MimeType = false)
{
if (!is_readable($Filepath))
{
throw new Exception("SpreadsheetReader: File (".$Filepath.") not readable");
}
// To avoid timezone warnings and exceptions for formatting dates retrieved from files
$DefaultTZ = @date_default_timezone_get();
if ($DefaultTZ)
{
date_default_timezone_set($DefaultTZ);
}
// Checking the other parameters for correctness
// This should be a check for string but we"re lenient
if (!empty($OriginalFilename) && !is_scalar($OriginalFilename))
{
throw new Exception("SpreadsheetReader: Original file (2nd parameter) path is not a string or a scalar value.");
}
if (!empty($MimeType) && !is_scalar($MimeType))
{
throw new Exception("SpreadsheetReader: Mime type (3nd parameter) path is not a string or a scalar value.");
}
// 1. Determine type
if (!$OriginalFilename)
{
$OriginalFilename = $Filepath;
}
$Extension = strtolower(pathinfo($OriginalFilename, PATHINFO_EXTENSION));
switch ($MimeType)
{
case "text/csv":
case "text/comma-separated-values":
case "text/plain":
$this -> Type = self::TYPE_CSV;
break;
case "application/vnd.ms-excel":
case "application/msexcel":
case "application/x-msexcel":
case "application/x-ms-excel":
case "application/vnd.ms-excel":
case "application/x-excel":
case "application/x-dos_ms_excel":
case "application/xls":
case "application/xlt":
case "application/x-xls":
// Excel does weird stuff
if (in_array($Extension, array("csv", "tsv", "txt")))
{
$this -> Type = self::TYPE_CSV;
}
else
{
$this -> Type = self::TYPE_XLS;
}
break;
case "application/vnd.oasis.opendocument.spreadsheet":
case "application/vnd.oasis.opendocument.spreadsheet-template":
$this -> Type = self::TYPE_ODS;
break;
case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
case "application/vnd.openxmlformats-officedocument.spreadsheetml.template":
case "application/xlsx":
case "application/xltx":
$this -> Type = self::TYPE_XLSX;
break;
case "application/xml":
// Excel 2004 xml format uses this
break;
}
if (!$this -> Type)
{
switch ($Extension)
{
case "xlsx":
case "xltx": // XLSX template
case "xlsm": // Macro-enabled XLSX
case "xltm": // Macro-enabled XLSX template
$this -> Type = self::TYPE_XLSX;
break;
case "xls":
case "xlt":
$this -> Type = self::TYPE_XLS;
break;
case "ods":
case "odt":
$this -> Type = self::TYPE_ODS;
break;
default:
$this -> Type = self::TYPE_CSV;
break;
}
}
// Pre-checking XLS files, in case they are renamed CSV or XLSX files
if ($this -> Type == self::TYPE_XLS)
{
self::Load(self::TYPE_XLS);
$this -> Handle = new SpreadsheetReader_XLS($Filepath);
if ($this -> Handle -> Error)
{
$this -> Handle -> __destruct();
if (is_resource($ZipHandle = zip_open($Filepath)))
{
$this -> Type = self::TYPE_XLSX;
zip_close($ZipHandle);
}
else
{
$this -> Type = self::TYPE_CSV;
}
}
}
// 2. Create handle
switch ($this -> Type)
{
case self::TYPE_XLSX:
self::Load(self::TYPE_XLSX);
$this -> Handle = new SpreadsheetReader_XLSX($Filepath);
break;
case self::TYPE_CSV:
self::Load(self::TYPE_CSV);
$this -> Handle = new SpreadsheetReader_CSV($Filepath, $this -> Options);
break;
case self::TYPE_XLS:
// Everything already happens above
break;
case self::TYPE_ODS:
self::Load(self::TYPE_ODS);
$this -> Handle = new SpreadsheetReader_ODS($Filepath, $this -> Options);
break;
}
}
/**
* Gets information about separate sheets in the given file
*
* @return array Associative array where key is sheet index and value is sheet name
*/
public function Sheets()
{
return $this -> Handle -> Sheets();
}
/**
* Changes the current sheet to another from the file.
* Note that changing the sheet will rewind the file to the beginning, even if
* the current sheet index is provided.
*
* @param int Sheet index
*
* @return bool True if sheet could be changed to the specified one,
* false if not (for example, if incorrect index was provided.
*/
public function ChangeSheet($Index)
{
return $this -> Handle -> ChangeSheet($Index);
}
/**
* Autoloads the required class for the particular spreadsheet type
*
* @param TYPE_* Spreadsheet type, one of TYPE_* constants of this class
*/
private static function Load($Type)
{
if (!in_array($Type, array(self::TYPE_XLSX, self::TYPE_XLS, self::TYPE_CSV, self::TYPE_ODS)))
{
throw new Exception("SpreadsheetReader: Invalid type (".$Type.")");
}
// 2nd parameter is to prevent autoloading for the class.
// If autoload works, the require line is unnecessary, if it doesn"t, it ends badly.
if (!class_exists("SpreadsheetReader_".$Type, false))
{
require(dirname(__FILE__).DIRECTORY_SEPARATOR."SpreadsheetReader_".$Type.".php");
}
}
// !Iterator interface methods
/**
* Rewind the Iterator to the first element.
* Similar to the reset() function for arrays in PHP
*/
public function rewind()
{
$this -> Index = 0;
if ($this -> Handle)
{
$this -> Handle -> rewind();
}
}
/**
* Return the current element.
* Similar to the current() function for arrays in PHP
*
* @return mixed current element from the collection
*/
public function current()
{
if ($this -> Handle)
{
return $this -> Handle -> current();
}
return null;
}
/**
* Move forward to next element.
* Similar to the next() function for arrays in PHP
*/
public function next()
{
if ($this -> Handle)
{
$this -> Index++;
return $this -> Handle -> next();
}
return null;
}
/**
* Return the identifying key of the current element.
* Similar to the key() function for arrays in PHP
*
* @return mixed either an integer or a string
*/
public function key()
{
if ($this -> Handle)
{
return $this -> Handle -> key();
}
return null;
}
/**
* Check if there is a current element after calls to rewind() or next().
* Used to check if we"ve iterated to the end of the collection
*
* @return boolean FALSE if there"s nothing more to iterate over
*/
public function valid()
{
if ($this -> Handle)
{
return $this -> Handle -> valid();
}
return false;
}
// !Countable interface method
public function count()
{
if ($this -> Handle)
{
return $this -> Handle -> count();
}
return 0;
}
/**
* Method for SeekableIterator interface. Takes a posiiton and traverses the file to that position
* The value can be retrieved with a `current()` call afterwards.
*
* @param int Position in file
*/
public function seek($Position)
{
if (!$this -> Handle)
{
throw new OutOfBoundsException("SpreadsheetReader: No file opened");
}
$CurrentIndex = $this -> Handle -> key();
if ($CurrentIndex != $Position)
{
if ($Position < $CurrentIndex || is_null($CurrentIndex) || $Position == 0)
{
$this -> rewind();
}
while ($this -> Handle -> valid() && ($Position > $this -> Handle -> key()))
{
$this -> Handle -> next();
}
if (!$this -> Handle -> valid())
{
throw new OutOfBoundsException("SpreadsheetError: Position ".$Position." not found");
}
}
return null;
}
}
?>
THEN BUILD LIBRARY : SpreadsheetReader_XLS.php
<?php
/**
- Class for parsing XLS files *
-
@author Martins Pilsetnieks
/
class SpreadsheetReader_XLS implements Iterator, Countable
{
/*
* @var array Options array, pre-populated with the default values.
*/
private $Options = array(
);/** * @var resource File handle */ private $Handle = false; private $Index = 0; private $Error = false; /** * @var array Sheet information */ private $Sheets = false; private $SheetIndexes = array(); /** * @var int Current sheet index */ private $CurrentSheet = 0; /** * @var array Content of the current row */ private $CurrentRow = array(); /** * @var int Column count in the sheet */ private $ColumnCount = 0; /** * @var int Row count in the sheet */ private $RowCount = 0; /** * @var array Template to use for empty rows. Retrieved rows are merged * with this so that empty cells are added, too */ private $EmptyRow = array(); /** * @param string Path to file * @param array Options */ public function __construct($Filepath, array $Options = null) { if (!is_readable($Filepath)) { throw new Exception("SpreadsheetReader_XLS: File not readable (".$Filepath.")"); } if (!class_exists("Spreadsheet_Excel_Reader")) { throw new Exception("SpreadsheetReader_XLS: Spreadsheet_Excel_Reader class not available"); } $this -> Handle = new Spreadsheet_Excel_Reader($Filepath, false, "UTF-8"); if (function_exists("mb_convert_encoding")) { $this -> Handle -> setUTFEncoder("mb"); } if (empty($this -> Handle -> sheets)) { $this -> Error = true; return null; } $this -> ChangeSheet(0); } public function __destruct() { unset($this -> Handle); } /** * Retrieves an array with information about sheets in the current file * * @return array List of sheets (key is sheet index, value is name) */ public function Sheets() { if ($this -> Sheets === false) { $this -> Sheets = array(); $this -> SheetIndexes = array_keys($this -> Handle -> sheets); foreach ($this -> SheetIndexes as $SheetIndex) { $this -> Sheets[] = $this -> Handle -> boundsheets[$SheetIndex]["name"]; } } return $this -> Sheets; } /** * Changes the current sheet in the file to another * * @param int Sheet index * * @return bool True if sheet was successfully changed, false otherwise. */ public function ChangeSheet($Index) { $Index = (int)$Index; $Sheets = $this -> Sheets(); if (isset($this -> Sheets[$Index])) { $this -> rewind(); $this -> CurrentSheet = $this -> SheetIndexes[$Index]; $this -> ColumnCount = $this -> Handle -> sheets[$this -> CurrentSheet]["numCols"]; $this -> RowCount = $this -> Handle -> sheets[$this -> CurrentSheet]["numRows"]; // For the case when Spreadsheet_Excel_Reader doesn"t have the row count set correctly. if (!$this -> RowCount && count($this -> Handle -> sheets[$this -> CurrentSheet]["cells"])) { end($this -> Handle -> sheets[$this -> CurrentSheet]["cells"]); $this -> RowCount = (int)key($this -> Handle -> sheets[$this -> CurrentSheet]["cells"]); } if ($this -> ColumnCount) { $this -> EmptyRow = array_fill(1, $this -> ColumnCount, ""); } else { $this -> EmptyRow = array(); } } return false; } public function __get($Name) { switch ($Name) { case "Error": return $this -> Error; break; } return null; } // !Iterator interface methods /** * Rewind the Iterator to the first element. * Similar to the reset() function for arrays in PHP */ public function rewind() { $this -> Index = 0; } /** * Return the current element. * Similar to the current() function for arrays in PHP * * @return mixed current element from the collection */ public function current() { if ($this -> Index == 0) { $this -> next(); } return $this -> CurrentRow; } /** * Move forward to next element. * Similar to the next() function for arrays in PHP */ public function next() { // Internal counter is advanced here instead of the if statement // because apparently it"s fully possible that an empty row will not be // present at all $this -> Index++; if ($this -> Error) { return array(); } elseif (isset($this -> Handle -> sheets[$this -> CurrentSheet]["cells"][$this -> Index])) { $this -> CurrentRow = $this -> Handle -> sheets[$this -> CurrentSheet]["cells"][$this -> Index]; if (!$this -> CurrentRow) { return array(); } $this -> CurrentRow = $this -> CurrentRow + $this -> EmptyRow; ksort($this -> CurrentRow); $this -> CurrentRow = array_values($this -> CurrentRow); return $this -> CurrentRow; } else { $this -> CurrentRow = $this -> EmptyRow; return $this -> CurrentRow; } } /** * Return the identifying key of the current element. * Similar to the key() function for arrays in PHP * * @return mixed either an integer or a string */ public function key() { return $this -> Index; } /** * Check if there is a current element after calls to rewind() or next(). * Used to check if we"ve iterated to the end of the collection * * @return boolean FALSE if there"s nothing more to iterate over */ public function valid() { if ($this -> Error) { return false; } return ($this -> Index <= $this -> RowCount); } // !Countable interface method /** * Ostensibly should return the count of the contained items but this just returns the number * of rows read so far. It"s not really correct but at least coherent. */ public function count() { if ($this -> Error) { return 0; } return $this -> RowCount; }
}
?>
THEN BUILD XLSX LIBRARY : SpreadsheetReader_XLSX.php
<?php
/**
- Class for parsing XLSX files specifically *
-
@author Martins Pilsetnieks
*/
class SpreadsheetReader_XLSX implements Iterator, Countable
{
const CELL_TYPE_BOOL = "b";
const CELL_TYPE_NUMBER = "n";
const CELL_TYPE_ERROR = "e";
const CELL_TYPE_SHARED_STR = "s";
const CELL_TYPE_STR = "str";
const CELL_TYPE_INLINE_STR = "inlineStr";/** * Number of shared strings that can be reasonably cached, i.e., that aren"t read from file but stored in memory. * If the total number of shared strings is higher than this, caching is not used. * If this value is null, shared strings are cached regardless of amount. * With large shared string caches there are huge performance gains, however a lot of memory could be used which * can be a problem, especially on shared hosting. */ const SHARED_STRING_CACHE_LIMIT = 50000; private $Options = array( "TempDir" => "", "ReturnDateTimeObjects" => false ); private static $RuntimeInfo = array( "GMPSupported" => false ); private $Valid = false; /** * @var SpreadsheetReader_* Handle for the reader object */ private $Handle = false; // Worksheet file /** * @var string Path to the worksheet XML file */ private $WorksheetPath = false; /** * @var XMLReader XML reader object for the worksheet XML file */ private $Worksheet = false; // Shared strings file /** * @var string Path to shared strings XML file */ private $SharedStringsPath = false; /** * @var XMLReader XML reader object for the shared strings XML file */ private $SharedStrings = false; /** * @var array Shared strings cache, if the number of shared strings is low enough */ private $SharedStringCache = array(); // Workbook data /** * @var SimpleXMLElement XML object for the workbook XML file */ private $WorkbookXML = false; // Style data /** * @var SimpleXMLElement XML object for the styles XML file */ private $StylesXML = false; /** * @var array Container for cell value style data */ private $Styles = array(); private $TempDir = ""; private $TempFiles = array(); private $CurrentRow = false; // Runtime parsing data /** * @var int Current row in the file */ private $Index = 0; /** * @var array Data about separate sheets in the file */ private $Sheets = false; private $SharedStringCount = 0; private $SharedStringIndex = 0; private $LastSharedStringValue = null; private $RowOpen = false; private $SSOpen = false; private $SSForwarded = false; private static $BuiltinFormats = array( 0 => "", 1 => "0", 2 => "0.00", 3 => "#,##0", 4 => "#,##0.00", 9 => "0%", 10 => "0.00%", 11 => "0.00E+00", 12 => "# ?/?", 13 => "# ??/??", 14 => "mm-dd-yy", 15 => "d-mmm-yy", 16 => "d-mmm", 17 => "mmm-yy", 18 => "h:mm AM/PM", 19 => "h:mm:ss AM/PM", 20 => "h:mm", 21 => "h:mm:ss", 22 => "m/d/yy h:mm", 37 => "#,##0 ;(#,##0)", 38 => "#,##0 ;[Red](#,##0)", 39 => "#,##0.00;(#,##0.00)", 40 => "#,##0.00;[Red](#,##0.00)", 45 => "mm:ss", 46 => "[h]:mm:ss", 47 => "mmss.0", 48 => "##0.0E+0", 49 => "@", // CHT & CHS 27 => "[$-404]e/m/d", 30 => "m/d/yy", 36 => "[$-404]e/m/d", 50 => "[$-404]e/m/d", 57 => "[$-404]e/m/d", // THA 59 => "t0", 60 => "t0.00", 61 =>"t#,##0", 62 => "t#,##0.00", 67 => "t0%", 68 => "t0.00%", 69 => "t# ?/?", 70 => "t# ??/??" ); private $Formats = array(); private static $DateReplacements = array( "All" => array( "\\" => "", "am/pm" => "A", "yyyy" => "Y", "yy" => "y", "mmmmm" => "M", "mmmm" => "F", "mmm" => "M", ":mm" => ":i", "mm" => "m", "m" => "n", "dddd" => "l", "ddd" => "D", "dd" => "d", "d" => "j", "ss" => "s", ".s" => "" ), "24H" => array( "hh" => "H", "h" => "G" ), "12H" => array( "hh" => "h", "h" => "G" ) ); private static $BaseDate = false; private static $DecimalSeparator = "."; private static $ThousandSeparator = ""; private static $CurrencyCode = ""; /** * @var array Cache for already processed format strings */ private $ParsedFormatCache = array(); /** * @param string Path to file * @param array Options: * TempDir => string Temporary directory path * ReturnDateTimeObjects => bool True => dates and times will be returned as PHP DateTime objects, false => as strings */ public function __construct($Filepath, array $Options = null) { if (!is_readable($Filepath)) { throw new Exception("SpreadsheetReader_XLSX: File not readable (".$Filepath.")"); } $this -> TempDir = isset($Options["TempDir"]) && is_writable($Options["TempDir"]) ? $Options["TempDir"] : sys_get_temp_dir(); $this -> TempDir = rtrim($this -> TempDir, DIRECTORY_SEPARATOR); $this -> TempDir = $this -> TempDir.DIRECTORY_SEPARATOR.uniqid().DIRECTORY_SEPARATOR; $Zip = new ZipArchive; $Status = $Zip -> open($Filepath); if ($Status !== true) { throw new Exception("SpreadsheetReader_XLSX: File not readable (".$Filepath.") (Error ".$Status.")"); } // Getting the general workbook information if ($Zip -> locateName("xl/workbook.xml") !== false) { $this -> WorkbookXML = new SimpleXMLElement($Zip -> getFromName("xl/workbook.xml")); } // Extracting the XMLs from the XLSX zip file if ($Zip -> locateName("xl/sharedStrings.xml") !== false) { $this -> SharedStringsPath = $this -> TempDir."xl".DIRECTORY_SEPARATOR."sharedStrings.xml"; $Zip -> extractTo($this -> TempDir, "xl/sharedStrings.xml"); $this -> TempFiles[] = $this -> TempDir."xl".DIRECTORY_SEPARATOR."sharedStrings.xml"; if (is_readable($this -> SharedStringsPath)) { $this -> SharedStrings = new XMLReader; $this -> SharedStrings -> open($this -> SharedStringsPath); $this -> PrepareSharedStringCache(); } } $Sheets = $this -> Sheets(); foreach ($this -> Sheets as $Index => $Name) { if ($Zip -> locateName("xl/worksheets/sheet".$Index.".xml") !== false) { $Zip -> extractTo($this -> TempDir, "xl/worksheets/sheet".$Index.".xml"); $this -> TempFiles[] = $this -> TempDir."xl".DIRECTORY_SEPARATOR."worksheets".DIRECTORY_SEPARATOR."sheet".$Index.".xml"; } } $this -> ChangeSheet(0); // If worksheet is present and is OK, parse the styles already if ($Zip -> locateName("xl/styles.xml") !== false) { $this -> StylesXML = new SimpleXMLElement($Zip -> getFromName("xl/styles.xml")); if ($this -> StylesXML && $this -> StylesXML -> cellXfs && $this -> StylesXML -> cellXfs -> xf) { foreach ($this -> StylesXML -> cellXfs -> xf as $Index => $XF) { // Format #0 is a special case - it is the "General" format that is applied regardless of applyNumberFormat if ($XF -> attributes() -> applyNumberFormat || (0 == (int)$XF -> attributes() -> numFmtId)) { $FormatId = (int)$XF -> attributes() -> numFmtId; // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts $this -> Styles[] = $FormatId; } else { // 0 for "General" format $this -> Styles[] = 0; } } } if ($this -> StylesXML -> numFmts && $this -> StylesXML -> numFmts -> numFmt) { foreach ($this -> StylesXML -> numFmts -> numFmt as $Index => $NumFmt) { $this -> Formats[(int)$NumFmt -> attributes() -> numFmtId] = (string)$NumFmt -> attributes() -> formatCode; } } unset($this -> StylesXML); } $Zip -> close(); // Setting base date if (!self::$BaseDate) { self::$BaseDate = new DateTime; self::$BaseDate -> setTimezone(new DateTimeZone("UTC")); self::$BaseDate -> setDate(1900, 1, 0); self::$BaseDate -> setTime(0, 0, 0); } // Decimal and thousand separators if (!self::$DecimalSeparator && !self::$ThousandSeparator && !self::$CurrencyCode) { $Locale = localeconv(); self::$DecimalSeparator = $Locale["decimal_point"]; self::$ThousandSeparator = $Locale["thousands_sep"]; self::$CurrencyCode = $Locale["int_curr_symbol"]; } if (function_exists("gmp_gcd")) { self::$RuntimeInfo["GMPSupported"] = true; } } /** * Destructor, destroys all that remains (closes and deletes temp files) */ public function __destruct() { foreach ($this -> TempFiles as $TempFile) { @unlink($TempFile); } // Better safe than sorry - shouldn"t try deleting "." or "/", or "..". if (strlen($this -> TempDir) > 2) { @rmdir($this -> TempDir."xl".DIRECTORY_SEPARATOR."worksheets"); @rmdir($this -> TempDir."xl"); @rmdir($this -> TempDir); } if ($this -> Worksheet && $this -> Worksheet instanceof XMLReader) { $this -> Worksheet -> close(); unset($this -> Worksheet); } unset($this -> WorksheetPath); if ($this -> SharedStrings && $this -> SharedStrings instanceof XMLReader) { $this -> SharedStrings -> close(); unset($this -> SharedStrings); } unset($this -> SharedStringsPath); if (isset($this -> StylesXML)) { unset($this -> StylesXML); } if ($this -> WorkbookXML) { unset($this -> WorkbookXML); } } /** * Retrieves an array with information about sheets in the current file * * @return array List of sheets (key is sheet index, value is name) */ public function Sheets() { if ($this -> Sheets === false) { $this -> Sheets = array(); foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet) { $Attributes = $Sheet -> attributes("r", true); foreach ($Attributes as $Name => $Value) { if ($Name == "id") { $SheetID = (int)str_replace("rId", "", (string)$Value); break; } } $this -> Sheets[$SheetID] = (string)$Sheet["name"]; } ksort($this -> Sheets); } return array_values($this -> Sheets); } /** * Changes the current sheet in the file to another * * @param int Sheet index * * @return bool True if sheet was successfully changed, false otherwise. */ public function ChangeSheet($Index) { $RealSheetIndex = false; $Sheets = $this -> Sheets(); if (isset($Sheets[$Index])) { $SheetIndexes = array_keys($this -> Sheets); $RealSheetIndex = $SheetIndexes[$Index]; } $TempWorksheetPath = $this -> TempDir."xl/worksheets/sheet".$RealSheetIndex.".xml"; if ($RealSheetIndex !== false && is_readable($TempWorksheetPath)) { $this -> WorksheetPath = $TempWorksheetPath; $this -> rewind(); return true; } return false; } /** * Creating shared string cache if the number of shared strings is acceptably low (or there is no limit on the amount */ private function PrepareSharedStringCache() { while ($this -> SharedStrings -> read()) { if ($this -> SharedStrings -> name == "sst") { $this -> SharedStringCount = $this -> SharedStrings -> getAttribute("count"); break; } } if (!$this -> SharedStringCount || (self::SHARED_STRING_CACHE_LIMIT < $this -> SharedStringCount && self::SHARED_STRING_CACHE_LIMIT !== null)) { return false; } $CacheIndex = 0; $CacheValue = ""; while ($this -> SharedStrings -> read()) { switch ($this -> SharedStrings -> name) { case "si": if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { $this -> SharedStringCache[$CacheIndex] = $CacheValue; $CacheIndex++; $CacheValue = ""; } break; case "t": if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { continue; } $CacheValue .= $this -> SharedStrings -> readString(); break; } } $this -> SharedStrings -> close(); return true; } /** * Retrieves a shared string value by its index * * @param int Shared string index * * @return string Value */ private function GetSharedString($Index) { if ((self::SHARED_STRING_CACHE_LIMIT === null || self::SHARED_STRING_CACHE_LIMIT > 0) && !empty($this -> SharedStringCache)) { if (isset($this -> SharedStringCache[$Index])) { return $this -> SharedStringCache[$Index]; } else { return ""; } } // If the desired index is before the current, rewind the XML if ($this -> SharedStringIndex > $Index) { $this -> SSOpen = false; $this -> SharedStrings -> close(); $this -> SharedStrings -> open($this -> SharedStringsPath); $this -> SharedStringIndex = 0; $this -> LastSharedStringValue = null; $this -> SSForwarded = false; } // Finding the unique string count (if not already read) if ($this -> SharedStringIndex == 0 && !$this -> SharedStringCount) { while ($this -> SharedStrings -> read()) { if ($this -> SharedStrings -> name == "sst") { $this -> SharedStringCount = $this -> SharedStrings -> getAttribute("uniqueCount"); break; } } } // If index of the desired string is larger than possible, don"t even bother. if ($this -> SharedStringCount && ($Index >= $this -> SharedStringCount)) { return ""; } // If an index with the same value as the last already fetched is requested // (any further traversing the tree would get us further away from the node) if (($Index == $this -> SharedStringIndex) && ($this -> LastSharedStringValue !== null)) { return $this -> LastSharedStringValue; } // Find the correct <si> node with the desired index while ($this -> SharedStringIndex <= $Index) { // SSForwarded is set further to avoid double reading in case nodes are skipped. if ($this -> SSForwarded) { $this -> SSForwarded = false; } else { $ReadStatus = $this -> SharedStrings -> read(); if (!$ReadStatus) { break; } } if ($this -> SharedStrings -> name == "si") { if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { $this -> SSOpen = false; $this -> SharedStringIndex++; } else { $this -> SSOpen = true; if ($this -> SharedStringIndex < $Index) { $this -> SSOpen = false; $this -> SharedStrings -> next("si"); $this -> SSForwarded = true; $this -> SharedStringIndex++; continue; } else { break; } } } } $Value = ""; // Extract the value from the shared string if ($this -> SSOpen && ($this -> SharedStringIndex == $Index)) { while ($this -> SharedStrings -> read()) { switch ($this -> SharedStrings -> name) { case "t": if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { continue; } $Value .= $this -> SharedStrings -> readString(); break; case "si": if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT) { $this -> SSOpen = false; $this -> SSForwarded = true; break 2; } break; } } } if ($Value) { $this -> LastSharedStringValue = $Value; } return $Value; } /** * Formats the value according to the index * * @param string Cell value * @param int Format index * * @return string Formatted cell value */ private function FormatValue($Value, $Index) { if (!is_numeric($Value)) { return $Value; } if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false)) { $Index = $this -> Styles[$Index]; } else { return $Value; } // A special case for the "General" format if ($Index == 0) { return $this -> GeneralFormat($Value); } $Format = array(); if (isset($this -> ParsedFormatCache[$Index])) { $Format = $this -> ParsedFormatCache[$Index]; } if (!$Format) { $Format = array( "Code" => false, "Type" => false, "Scale" => 1, "Thousands" => false, "Currency" => false ); if (isset(self::$BuiltinFormats[$Index])) { $Format["Code"] = self::$BuiltinFormats[$Index]; } elseif (isset($this -> Formats[$Index])) { $Format["Code"] = $this -> Formats[$Index]; } // Format code found, now parsing the format if ($Format["Code"]) { $Sections = explode(";", $Format["Code"]); $Format["Code"] = $Sections[0]; switch (count($Sections)) { case 2: if ($Value < 0) { $Format["Code"] = $Sections[1]; } break; case 3: case 4: if ($Value < 0) { $Format["Code"] = $Sections[1]; } elseif ($Value == 0) { $Format["Code"] = $Sections[2]; } break; } } // Stripping colors $Format["Code"] = trim(preg_replace("{^\[[[:alpha:]]+\]}i", "", $Format["Code"])); // Percentages if (substr($Format["Code"], -1) == "%") { $Format["Type"] = "Percentage"; } elseif (preg_match("{^(\[\$[[:alpha:]]*-[0-9A-F]*\])*[hmsdy]}i", $Format["Code"])) { $Format["Type"] = "DateTime"; $Format["Code"] = trim(preg_replace("{^(\[\$[[:alpha:]]*-[0-9A-F]*\])}i", "", $Format["Code"])); $Format["Code"] = strtolower($Format["Code"]); $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["All"]); if (strpos($Format["Code"], "A") === false) { $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["24H"]); } else { $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["12H"]); } } elseif ($Format["Code"] == "[$EUR ]#,##0.00_-") { $Format["Type"] = "Euro"; } else { // Removing skipped characters $Format["Code"] = preg_replace("{_.}", "", $Format["Code"]); // Removing unnecessary escaping $Format["Code"] = preg_replace("{\\\\}", "", $Format["Code"]); // Removing string quotes $Format["Code"] = str_replace(array(""", "*"), "", $Format["Code"]); // Removing thousands separator if (strpos($Format["Code"], "0,0") !== false || strpos($Format["Code"], "#,#") !== false) { $Format["Thousands"] = true; } $Format["Code"] = str_replace(array("0,0", "#,#"), array("00", "##"), $Format["Code"]); // Scaling (Commas indicate the power) $Scale = 1; $Matches = array(); if (preg_match("{(0|#)(,+)}", $Format["Code"], $Matches)) { $Scale = pow(1000, strlen($Matches[2])); // Removing the commas $Format["Code"] = preg_replace(array("{0,+}", "{#,+}"), array("0", "#"), $Format["Code"]); } $Format["Scale"] = $Scale; if (preg_match("{#?.*\?\/\?}", $Format["Code"])) { $Format["Type"] = "Fraction"; } else { $Format["Code"] = str_replace("#", "", $Format["Code"]); $Matches = array(); if (preg_match("{(0+)(\.?)(0*)}", preg_replace("{\[[^\]]+\]}", "", $Format["Code"]), $Matches)) { $Integer = $Matches[1]; $DecimalPoint = $Matches[2]; $Decimals = $Matches[3]; $Format["MinWidth"] = strlen($Integer) + strlen($DecimalPoint) + strlen($Decimals); $Format["Decimals"] = $Decimals; $Format["Precision"] = strlen($Format["Decimals"]); $Format["Pattern"] = "%0".$Format["MinWidth"].".".$Format["Precision"]."f"; } } $Matches = array(); if (preg_match("{\[\$(.*)\]}u", $Format["Code"], $Matches)) { $CurrFormat = $Matches[0]; $CurrCode = $Matches[1]; $CurrCode = explode("-", $CurrCode); if ($CurrCode) { $CurrCode = $CurrCode[0]; } if (!$CurrCode) { $CurrCode = self::$CurrencyCode; } $Format["Currency"] = $CurrCode; } $Format["Code"] = trim($Format["Code"]); } $this -> ParsedFormatCache[$Index] = $Format; } // Applying format to value if ($Format) { if ($Format["Code"] == "@") { return (string)$Value; } // Percentages elseif ($Format["Type"] == "Percentage") { if ($Format["Code"] === "0%") { $Value = round(100 * $Value, 0)."%"; } else { $Value = sprintf("%.2f%%", round(100 * $Value, 2)); } } // Dates and times elseif ($Format["Type"] == "DateTime") { $Days = (int)$Value; // Correcting for Feb 29, 1900 if ($Days > 60) { $Days--; } // At this point time is a fraction of a day $Time = ($Value - (int)$Value); $Seconds = 0; if ($Time) { // Here time is converted to seconds // Some loss of precision will occur $Seconds = (int)($Time * 86400); } $Value = clone self::$BaseDate; $Value -> add(new DateInterval("P".$Days."D".($Seconds ? "T".$Seconds."S" : ""))); if (!$this -> Options["ReturnDateTimeObjects"]) { $Value = $Value -> format($Format["Code"]); } else { // A DateTime object is returned } } elseif ($Format["Type"] == "Euro") { $Value = "EUR ".sprintf("%1.2f", $Value); } else { // Fractional numbers if ($Format["Type"] == "Fraction" && ($Value != (int)$Value)) { $Integer = floor(abs($Value)); $Decimal = fmod(abs($Value), 1); // Removing the integer part and decimal point $Decimal *= pow(10, strlen($Decimal) - 2); $DecimalDivisor = pow(10, strlen($Decimal)); if (self::$RuntimeInfo["GMPSupported"]) { $GCD = gmp_strval(gmp_gcd($Decimal, $DecimalDivisor)); } else { $GCD = self::GCD($Decimal, $DecimalDivisor); } $AdjDecimal = $DecimalPart/$GCD; $AdjDecimalDivisor = $DecimalDivisor/$GCD; if ( strpos($Format["Code"], "0") !== false || strpos($Format["Code"], "#") !== false || substr($Format["Code"], 0, 3) == "? ?" ) { // The integer part is shown separately apart from the fraction $Value = ($Value < 0 ? "-" : ""). $Integer ? $Integer." " : "". $AdjDecimal."/". $AdjDecimalDivisor; } else { // The fraction includes the integer part $AdjDecimal += $Integer * $AdjDecimalDivisor; $Value = ($Value < 0 ? "-" : ""). $AdjDecimal."/". $AdjDecimalDivisor; } } else { // Scaling $Value = $Value / $Format["Scale"]; if (!empty($Format["MinWidth"]) && $Format["Decimals"]) { if ($Format["Thousands"]) { $Value = number_format($Value, $Format["Precision"], self::$DecimalSeparator, self::$ThousandSeparator); } else { $Value = sprintf($Format["Pattern"], $Value); } $Value = preg_replace("{(0+)(\.?)(0*)}", $Value, $Format["Code"]); } } // Currency/Accounting if ($Format["Currency"]) { $Value = preg_replace("", $Format["Currency"], $Value); } } } return $Value; } /** * Attempts to approximate Excel"s "general" format. * * @param mixed Value * * @return mixed Result */ public function GeneralFormat($Value) { // Numeric format if (is_numeric($Value)) { $Value = (float)$Value; } return $Value; } // !Iterator interface methods /** * Rewind the Iterator to the first element. * Similar to the reset() function for arrays in PHP */ public function rewind() { // Removed the check whether $this -> Index == 0 otherwise ChangeSheet doesn"t work properly // If the worksheet was already iterated, XML file is reopened. // Otherwise it should be at the beginning anyway if ($this -> Worksheet instanceof XMLReader) { $this -> Worksheet -> close(); } else { $this -> Worksheet = new XMLReader; } $this -> Worksheet -> open($this -> WorksheetPath); $this -> Valid = true; $this -> RowOpen = false; $this -> CurrentRow = false; $this -> Index = 0; } /** * Return the current element. * Similar to the current() function for arrays in PHP * * @return mixed current element from the collection */ public function current() { if ($this -> Index == 0 && $this -> CurrentRow === false) { $this -> next(); $this -> Index--; } return $this -> CurrentRow; } /** * Move forward to next element. * Similar to the next() function for arrays in PHP */ public function next() { $this -> Index++; $this -> CurrentRow = array(); if (!$this -> RowOpen) { while ($this -> Valid = $this -> Worksheet -> read()) { if ($this -> Worksheet -> name == "row") { // Getting the row spanning area (stored as e.g., 1:12) // so that the last cells will be present, even if empty $RowSpans = $this -> Worksheet -> getAttribute("spans"); if ($RowSpans) { $RowSpans = explode(":", $RowSpans); $CurrentRowColumnCount = $RowSpans[1]; } else { $CurrentRowColumnCount = 0; } if ($CurrentRowColumnCount > 0) { $this -> CurrentRow = array_fill(0, $CurrentRowColumnCount, ""); } $this -> RowOpen = true; break; } } } // Reading the necessary row, if found if ($this -> RowOpen) { // These two are needed to control for empty cells $MaxIndex = 0; $CellCount = 0; $CellHasSharedString = false; while ($this -> Valid = $this -> Worksheet -> read()) { switch ($this -> Worksheet -> name) { // End of row case "row": if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { $this -> RowOpen = false; break 2; } break; // Cell case "c": // If it is a closing tag, skip it if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { continue; } $StyleId = (int)$this -> Worksheet -> getAttribute("s"); // Get the index of the cell $Index = $this -> Worksheet -> getAttribute("r"); $Letter = preg_replace("{[^[:alpha:]]}S", "", $Index); $Index = self::IndexFromColumnLetter($Letter); // Determine cell type if ($this -> Worksheet -> getAttribute("t") == self::CELL_TYPE_SHARED_STR) { $CellHasSharedString = true; } else { $CellHasSharedString = false; } $this -> CurrentRow[$Index] = ""; $CellCount++; if ($Index > $MaxIndex) { $MaxIndex = $Index; } break; // Cell value case "v": case "is": if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT) { continue; } $Value = $this -> Worksheet -> readString(); if ($CellHasSharedString) { $Value = $this -> GetSharedString($Value); } // Format value if necessary if ($Value !== "" && $StyleId && isset($this -> Styles[$StyleId])) { $Value = $this -> FormatValue($Value, $StyleId); } elseif ($Value) { $Value = $this -> GeneralFormat($Value); } $this -> CurrentRow[$Index] = $Value; break; } } // Adding empty cells, if necessary // Only empty cells inbetween and on the left side are added if ($MaxIndex + 1 > $CellCount) { $this -> CurrentRow = $this -> CurrentRow + array_fill(0, $MaxIndex + 1, ""); ksort($this -> CurrentRow); } } return $this -> CurrentRow; } /** * Return the identifying key of the current element. * Similar to the key() function for arrays in PHP * * @return mixed either an integer or a string */ public function key() { return $this -> Index; } /** * Check if there is a current element after calls to rewind() or next(). * Used to check if we"ve iterated to the end of the collection * * @return boolean FALSE if there"s nothing more to iterate over */ public function valid() { return $this -> Valid; } // !Countable interface method /** * Ostensibly should return the count of the contained items but this just returns the number * of rows read so far. It"s not really correct but at least coherent. */ public function count() { return $this -> Index + 1; } /** * Takes the column letter and converts it to a numerical index (0-based) * * @param string Letter(s) to convert * * @return mixed Numeric index (0-based) or boolean false if it cannot be calculated */ public static function IndexFromColumnLetter($Letter) { $Powers = array(); $Letter = strtoupper($Letter); $Result = 0; for ($i = strlen($Letter) - 1, $j = 0; $i >= 0; $i--, $j++) { $Ord = ord($Letter[$i]) - 64; if ($Ord > 26) { // Something is very, very wrong return false; } $Result += $Ord * pow(26, $j); } return $Result - 1; } /** * Helper function for greatest common divisor calculation in case GMP extension is * not enabled * * @param int Number #1 * @param int Number #2 * * @param int Greatest common divisor */ public static function GCD($A, $B) { $A = abs($A); $B = abs($B); if ($A + $B == 0) { return 0; } else { $C = 1; while ($A > 0) { $C = $A; $A = $B % $A; $B = $C; } return $C; } }
}
?>
example Call :
require("SpreadsheetReader.php");
$Reader = new SpreadsheetReader($inputfile);
$totalCount = 0;
$test=0;
foreach ($Reader as $Row)
{
$count_rows = $Row;
foreach($count_rows as $countRow){
if($countRow != ""){
$taa = explode(" ",$countRow);
$tCount = count($taa);
$result[]=$countRow." ";
}
}
}
return implode("",$result);
FULL REFERENCE
Top comments (0)