DEV Community

Teddy Zugana
Teddy Zugana

Posted on • Edited on

PHP excel Extract All Content Data to Txt

build : SpreadsheetReader.php

     <?php
     /**
     * Main class for spreadsheet reading
     *
     * @version 0.5.10
     * @author Martins Pilsetnieks
     */
class SpreadsheetReader implements SeekableIterator, Countable
{
    const TYPE_XLSX = "XLSX";
    const TYPE_XLS = "XLS";
    const TYPE_CSV = "CSV";
    const TYPE_ODS = "ODS";

    private $Options = array(
        "Delimiter" => "",
        "Enclosure" => """
    );

    /**
     * @var int Current row in the file
     */
    private $Index = 0;

    /**
     * @var SpreadsheetReader_* Handle for the reader object
     */
    private $Handle = array();

    /**
     * @var TYPE_* Type of the contained spreadsheet
     */
    private $Type = false;

    /**
     * @param string Path to file
     * @param string Original filename (in case of an uploaded file), used to determine file type, optional
     * @param string MIME type from an upload, used to determine file type, optional
     */
    public function __construct($Filepath, $OriginalFilename = false, $MimeType = false)
    {
        if (!is_readable($Filepath))
        {
            throw new Exception("SpreadsheetReader: File (".$Filepath.") not readable");
        }

        // To avoid timezone warnings and exceptions for formatting dates retrieved from files
        $DefaultTZ = @date_default_timezone_get();
        if ($DefaultTZ)
        {
            date_default_timezone_set($DefaultTZ);
        }

        // Checking the other parameters for correctness

        // This should be a check for string but we"re lenient
        if (!empty($OriginalFilename) && !is_scalar($OriginalFilename))
        {
            throw new Exception("SpreadsheetReader: Original file (2nd parameter) path is not a string or a scalar value.");
        }
        if (!empty($MimeType) && !is_scalar($MimeType))
        {
            throw new Exception("SpreadsheetReader: Mime type (3nd parameter) path is not a string or a scalar value.");
        }

        // 1. Determine type
        if (!$OriginalFilename)
        {
            $OriginalFilename = $Filepath;
        }

        $Extension = strtolower(pathinfo($OriginalFilename, PATHINFO_EXTENSION));

        switch ($MimeType)
        {
            case "text/csv":
            case "text/comma-separated-values":
            case "text/plain":
                $this -> Type = self::TYPE_CSV;
                break;
            case "application/vnd.ms-excel":
            case "application/msexcel":
            case "application/x-msexcel":
            case "application/x-ms-excel":
            case "application/vnd.ms-excel":
            case "application/x-excel":
            case "application/x-dos_ms_excel":
            case "application/xls":
            case "application/xlt":
            case "application/x-xls":
                // Excel does weird stuff
                if (in_array($Extension, array("csv", "tsv", "txt")))
                {
                    $this -> Type = self::TYPE_CSV;
                }
                else
                {
                    $this -> Type = self::TYPE_XLS;
                }
                break;
            case "application/vnd.oasis.opendocument.spreadsheet":
            case "application/vnd.oasis.opendocument.spreadsheet-template":
                $this -> Type = self::TYPE_ODS;
                break;
            case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
            case "application/vnd.openxmlformats-officedocument.spreadsheetml.template":
            case "application/xlsx":
            case "application/xltx":
                $this -> Type = self::TYPE_XLSX;
                break;
            case "application/xml":
                // Excel 2004 xml format uses this
                break;
        }

        if (!$this -> Type)
        {
            switch ($Extension)
            {
                case "xlsx":
                case "xltx": // XLSX template
                case "xlsm": // Macro-enabled XLSX
                case "xltm": // Macro-enabled XLSX template
                    $this -> Type = self::TYPE_XLSX;
                    break;
                case "xls":
                case "xlt":
                    $this -> Type = self::TYPE_XLS;
                    break;
                case "ods":
                case "odt":
                    $this -> Type = self::TYPE_ODS;
                    break;
                default:
                    $this -> Type = self::TYPE_CSV;
                    break;
            }
        }

        // Pre-checking XLS files, in case they are renamed CSV or XLSX files
        if ($this -> Type == self::TYPE_XLS)
        {
            self::Load(self::TYPE_XLS);
            $this -> Handle = new SpreadsheetReader_XLS($Filepath);
            if ($this -> Handle -> Error)
            {
                $this -> Handle -> __destruct();

                if (is_resource($ZipHandle = zip_open($Filepath)))
                {
                    $this -> Type = self::TYPE_XLSX;
                    zip_close($ZipHandle);
                }
                else
                {
                    $this -> Type = self::TYPE_CSV;
                }
            }
        }

        // 2. Create handle
        switch ($this -> Type)
        {
            case self::TYPE_XLSX:
                self::Load(self::TYPE_XLSX);
                $this -> Handle = new SpreadsheetReader_XLSX($Filepath);
                break;
            case self::TYPE_CSV:
                self::Load(self::TYPE_CSV);
                $this -> Handle = new SpreadsheetReader_CSV($Filepath, $this -> Options);
                break;
            case self::TYPE_XLS:
                // Everything already happens above
                break;
            case self::TYPE_ODS:
                self::Load(self::TYPE_ODS);
                $this -> Handle = new SpreadsheetReader_ODS($Filepath, $this -> Options);
                break;
        }
    }

    /**
     * Gets information about separate sheets in the given file
     *
     * @return array Associative array where key is sheet index and value is sheet name
     */
    public function Sheets()
    {
        return $this -> Handle -> Sheets();
    }

    /**
     * Changes the current sheet to another from the file.
     *  Note that changing the sheet will rewind the file to the beginning, even if
     *  the current sheet index is provided.
     *
     * @param int Sheet index
     *
     * @return bool True if sheet could be changed to the specified one,
     *  false if not (for example, if incorrect index was provided.
     */
    public function ChangeSheet($Index)
    {
        return $this -> Handle -> ChangeSheet($Index);
    }

    /**
     * Autoloads the required class for the particular spreadsheet type
     *
     * @param TYPE_* Spreadsheet type, one of TYPE_* constants of this class
     */
    private static function Load($Type)
    {
        if (!in_array($Type, array(self::TYPE_XLSX, self::TYPE_XLS, self::TYPE_CSV, self::TYPE_ODS)))
        {
            throw new Exception("SpreadsheetReader: Invalid type (".$Type.")");
        }

        // 2nd parameter is to prevent autoloading for the class.
        // If autoload works, the require line is unnecessary, if it doesn"t, it ends badly.
        if (!class_exists("SpreadsheetReader_".$Type, false))
        {
            require(dirname(__FILE__).DIRECTORY_SEPARATOR."SpreadsheetReader_".$Type.".php");
        }
    }

    // !Iterator interface methods

    /** 
     * Rewind the Iterator to the first element.
     * Similar to the reset() function for arrays in PHP
     */ 
    public function rewind()
    {
        $this -> Index = 0;
        if ($this -> Handle)
        {
            $this -> Handle -> rewind();
        }
    }

    /** 
     * Return the current element.
     * Similar to the current() function for arrays in PHP
     *
     * @return mixed current element from the collection
     */
    public function current()
    {
        if ($this -> Handle)
        {
            return $this -> Handle -> current();
        }
        return null;
    }

    /** 
     * Move forward to next element. 
     * Similar to the next() function for arrays in PHP 
     */ 
    public function next()
    {
        if ($this -> Handle)
        {
            $this -> Index++;

            return $this -> Handle -> next();
        }
        return null;
    }

    /** 
     * Return the identifying key of the current element.
     * Similar to the key() function for arrays in PHP
     *
     * @return mixed either an integer or a string
     */ 
    public function key()
    {
        if ($this -> Handle)
        {
            return $this -> Handle -> key();
        }
        return null;
    }

    /** 
     * Check if there is a current element after calls to rewind() or next().
     * Used to check if we"ve iterated to the end of the collection
     *
     * @return boolean FALSE if there"s nothing more to iterate over
     */ 
    public function valid()
    {
        if ($this -> Handle)
        {
            return $this -> Handle -> valid();
        }
        return false;
    }

    // !Countable interface method
    public function count()
    {
        if ($this -> Handle)
        {
            return $this -> Handle -> count();
        }
        return 0;
    }

    /**
     * Method for SeekableIterator interface. Takes a posiiton and traverses the file to that position
     * The value can be retrieved with a `current()` call afterwards.
     *
     * @param int Position in file
     */
    public function seek($Position)
    {
        if (!$this -> Handle)
        {
            throw new OutOfBoundsException("SpreadsheetReader: No file opened");
        }

        $CurrentIndex = $this -> Handle -> key();

        if ($CurrentIndex != $Position)
        {
            if ($Position < $CurrentIndex || is_null($CurrentIndex) || $Position == 0)
            {
                $this -> rewind();
            }

            while ($this -> Handle -> valid() && ($Position > $this -> Handle -> key()))
            {
                $this -> Handle -> next();
            }

            if (!$this -> Handle -> valid())
            {
                throw new OutOfBoundsException("SpreadsheetError: Position ".$Position." not found");
            }
        }

        return null;
    }
}
Enter fullscreen mode Exit fullscreen mode

?>

THEN BUILD LIBRARY : SpreadsheetReader_XLS.php

<?php
/**

  • Class for parsing XLS files *
  • @author Martins Pilsetnieks
    /
    class SpreadsheetReader_XLS implements Iterator, Countable
    {
    /
    *
    * @var array Options array, pre-populated with the default values.
    */
    private $Options = array(
    );

    /**
     * @var resource File handle
     */
    private $Handle = false;
    
    private $Index = 0;
    
    private $Error = false;
    
    /**
     * @var array Sheet information
     */
    private $Sheets = false;
    private $SheetIndexes = array();
    
    /**
     * @var int Current sheet index
     */
    private $CurrentSheet = 0;
    
    /**
     * @var array Content of the current row
     */
    private $CurrentRow = array();
    
    /**
     * @var int Column count in the sheet
     */
    private $ColumnCount = 0;
    /**
     * @var int Row count in the sheet
     */
    private $RowCount = 0;
    
    /**
     * @var array Template to use for empty rows. Retrieved rows are merged
     *  with this so that empty cells are added, too
     */
    private $EmptyRow = array();
    
    /**
     * @param string Path to file
     * @param array Options
     */
    public function __construct($Filepath, array $Options = null)
    {
        if (!is_readable($Filepath))
        {
            throw new Exception("SpreadsheetReader_XLS: File not readable (".$Filepath.")");
        }
    
        if (!class_exists("Spreadsheet_Excel_Reader"))
        {
            throw new Exception("SpreadsheetReader_XLS: Spreadsheet_Excel_Reader class not available");
        }
    
        $this -> Handle = new Spreadsheet_Excel_Reader($Filepath, false, "UTF-8");
    
        if (function_exists("mb_convert_encoding"))
        {
            $this -> Handle -> setUTFEncoder("mb");
        }
    
        if (empty($this -> Handle -> sheets))
        {
            $this -> Error = true;
            return null;
        }
    
        $this -> ChangeSheet(0);
    }
    
    public function __destruct()
    {
        unset($this -> Handle);
    }
    
    /**
     * Retrieves an array with information about sheets in the current file
     *
     * @return array List of sheets (key is sheet index, value is name)
     */
    public function Sheets()
    {
        if ($this -> Sheets === false)
        {
            $this -> Sheets = array();
            $this -> SheetIndexes = array_keys($this -> Handle -> sheets);
    
            foreach ($this -> SheetIndexes as $SheetIndex)
            {
                $this -> Sheets[] = $this -> Handle -> boundsheets[$SheetIndex]["name"];
            }
        }
        return $this -> Sheets;
    }
    
    /**
     * Changes the current sheet in the file to another
     *
     * @param int Sheet index
     *
     * @return bool True if sheet was successfully changed, false otherwise.
     */
    public function ChangeSheet($Index)
    {
        $Index = (int)$Index;
        $Sheets = $this -> Sheets();
    
        if (isset($this -> Sheets[$Index]))
        {
            $this -> rewind();
            $this -> CurrentSheet = $this -> SheetIndexes[$Index];
    
            $this -> ColumnCount = $this -> Handle -> sheets[$this -> CurrentSheet]["numCols"];
            $this -> RowCount = $this -> Handle -> sheets[$this -> CurrentSheet]["numRows"];
    
            // For the case when Spreadsheet_Excel_Reader doesn"t have the row count set correctly.
            if (!$this -> RowCount && count($this -> Handle -> sheets[$this -> CurrentSheet]["cells"]))
            {
                end($this -> Handle -> sheets[$this -> CurrentSheet]["cells"]);
                $this -> RowCount = (int)key($this -> Handle -> sheets[$this -> CurrentSheet]["cells"]);
            }
    
            if ($this -> ColumnCount)
            {
                $this -> EmptyRow = array_fill(1, $this -> ColumnCount, "");
            }
            else
            {
                $this -> EmptyRow = array();
            }
        }
    
        return false;
    }
    
    public function __get($Name)
    {
        switch ($Name)
        {
            case "Error":
                return $this -> Error;
                break;
        }
        return null;
    }
    
    // !Iterator interface methods
    /** 
     * Rewind the Iterator to the first element.
     * Similar to the reset() function for arrays in PHP
     */ 
    public function rewind()
    {
        $this -> Index = 0;
    }
    
    /**
     * Return the current element.
     * Similar to the current() function for arrays in PHP
     *
     * @return mixed current element from the collection
     */
    public function current()
    {
        if ($this -> Index == 0)
        {
            $this -> next();
        }
    
        return $this -> CurrentRow;
    }
    
    /** 
     * Move forward to next element. 
     * Similar to the next() function for arrays in PHP 
     */ 
    public function next()
    {
        // Internal counter is advanced here instead of the if statement
        //  because apparently it"s fully possible that an empty row will not be
        //  present at all
        $this -> Index++;
    
        if ($this -> Error)
        {
            return array();
        }
        elseif (isset($this -> Handle -> sheets[$this -> CurrentSheet]["cells"][$this -> Index]))
        {
            $this -> CurrentRow = $this -> Handle -> sheets[$this -> CurrentSheet]["cells"][$this -> Index];
            if (!$this -> CurrentRow)
            {
                return array();
            }
    
            $this -> CurrentRow = $this -> CurrentRow + $this -> EmptyRow;
            ksort($this -> CurrentRow);
    
            $this -> CurrentRow = array_values($this -> CurrentRow);
            return $this -> CurrentRow;
        }
        else
        {
            $this -> CurrentRow = $this -> EmptyRow;
            return $this -> CurrentRow;
        }
    }
    
    /** 
     * Return the identifying key of the current element.
     * Similar to the key() function for arrays in PHP
     *
     * @return mixed either an integer or a string
     */ 
    public function key()
    {
        return $this -> Index;
    }
    
    /** 
     * Check if there is a current element after calls to rewind() or next().
     * Used to check if we"ve iterated to the end of the collection
     *
     * @return boolean FALSE if there"s nothing more to iterate over
     */ 
    public function valid()
    {
        if ($this -> Error)
        {
            return false;
        }
        return ($this -> Index <= $this -> RowCount);
    }
    
    // !Countable interface method
    /**
     * Ostensibly should return the count of the contained items but this just returns the number
     * of rows read so far. It"s not really correct but at least coherent.
     */
    public function count()
    {
        if ($this -> Error)
        {
            return 0;
        }
    
        return $this -> RowCount;
    }
    

    }
    ?>

THEN BUILD XLSX LIBRARY : SpreadsheetReader_XLSX.php

<?php
/**

  • Class for parsing XLSX files specifically *
  • @author Martins Pilsetnieks
    */
    class SpreadsheetReader_XLSX implements Iterator, Countable
    {
    const CELL_TYPE_BOOL = "b";
    const CELL_TYPE_NUMBER = "n";
    const CELL_TYPE_ERROR = "e";
    const CELL_TYPE_SHARED_STR = "s";
    const CELL_TYPE_STR = "str";
    const CELL_TYPE_INLINE_STR = "inlineStr";

    /**
     * Number of shared strings that can be reasonably cached, i.e., that aren"t read from file but stored in memory.
     *  If the total number of shared strings is higher than this, caching is not used.
     *  If this value is null, shared strings are cached regardless of amount.
     *  With large shared string caches there are huge performance gains, however a lot of memory could be used which
     *  can be a problem, especially on shared hosting.
     */
    const SHARED_STRING_CACHE_LIMIT = 50000;
    
    private $Options = array(
        "TempDir" => "",
        "ReturnDateTimeObjects" => false
    );
    
    private static $RuntimeInfo = array(
        "GMPSupported" => false
    );
    
    private $Valid = false;
    
    /**
     * @var SpreadsheetReader_* Handle for the reader object
     */
    private $Handle = false;
    
    // Worksheet file
    /**
     * @var string Path to the worksheet XML file
     */
    private $WorksheetPath = false;
    /**
     * @var XMLReader XML reader object for the worksheet XML file
     */
    private $Worksheet = false;
    
    // Shared strings file
    /**
     * @var string Path to shared strings XML file
     */
    private $SharedStringsPath = false;
    /**
     * @var XMLReader XML reader object for the shared strings XML file
     */
    private $SharedStrings = false;
    /**
     * @var array Shared strings cache, if the number of shared strings is low enough
     */
    private $SharedStringCache = array();
    
    // Workbook data
    /**
     * @var SimpleXMLElement XML object for the workbook XML file
     */
    private $WorkbookXML = false;
    
    // Style data
    /**
     * @var SimpleXMLElement XML object for the styles XML file
     */
    private $StylesXML = false;
    /**
     * @var array Container for cell value style data
     */
    private $Styles = array();
    
    private $TempDir = "";
    private $TempFiles = array();
    
    private $CurrentRow = false;
    
    // Runtime parsing data
    /**
     * @var int Current row in the file
     */
    private $Index = 0;
    
    /**
     * @var array Data about separate sheets in the file
     */
    private $Sheets = false;
    
    private $SharedStringCount = 0;
    private $SharedStringIndex = 0;
    private $LastSharedStringValue = null;
    
    private $RowOpen = false;
    
    private $SSOpen = false;
    private $SSForwarded = false;
    
    private static $BuiltinFormats = array(
        0 => "",
        1 => "0",
        2 => "0.00",
        3 => "#,##0",
        4 => "#,##0.00",
    
        9 => "0%",
        10 => "0.00%",
        11 => "0.00E+00",
        12 => "# ?/?",
        13 => "# ??/??",
        14 => "mm-dd-yy",
        15 => "d-mmm-yy",
        16 => "d-mmm",
        17 => "mmm-yy",
        18 => "h:mm AM/PM",
        19 => "h:mm:ss AM/PM",
        20 => "h:mm",
        21 => "h:mm:ss",
        22 => "m/d/yy h:mm",
    
        37 => "#,##0 ;(#,##0)",
        38 => "#,##0 ;[Red](#,##0)",
        39 => "#,##0.00;(#,##0.00)",
        40 => "#,##0.00;[Red](#,##0.00)",
    
        45 => "mm:ss",
        46 => "[h]:mm:ss",
        47 => "mmss.0",
        48 => "##0.0E+0",
        49 => "@",
    
        // CHT & CHS
        27 => "[$-404]e/m/d",
        30 => "m/d/yy",
        36 => "[$-404]e/m/d",
        50 => "[$-404]e/m/d",
        57 => "[$-404]e/m/d",
    
        // THA
        59 => "t0",
        60 => "t0.00",
        61 =>"t#,##0",
        62 => "t#,##0.00",
        67 => "t0%",
        68 => "t0.00%",
        69 => "t# ?/?",
        70 => "t# ??/??"
    );
    private $Formats = array();
    
    private static $DateReplacements = array(
        "All" => array(
            "\\" => "",
            "am/pm" => "A",
            "yyyy" => "Y",
            "yy" => "y",
            "mmmmm" => "M",
            "mmmm" => "F",
            "mmm" => "M",
            ":mm" => ":i",
            "mm" => "m",
            "m" => "n",
            "dddd" => "l",
            "ddd" => "D",
            "dd" => "d",
            "d" => "j",
            "ss" => "s",
            ".s" => ""
        ),
        "24H" => array(
            "hh" => "H",
            "h" => "G"
        ),
        "12H" => array(
            "hh" => "h",
            "h" => "G"
        )
    );
    
    private static $BaseDate = false;
    private static $DecimalSeparator = ".";
    private static $ThousandSeparator = "";
    private static $CurrencyCode = "";
    
    /**
     * @var array Cache for already processed format strings
     */
    private $ParsedFormatCache = array();
    
    /**
     * @param string Path to file
     * @param array Options:
     *  TempDir => string Temporary directory path
     *  ReturnDateTimeObjects => bool True => dates and times will be returned as PHP DateTime objects, false => as strings
     */
    public function __construct($Filepath, array $Options = null)
    {
        if (!is_readable($Filepath))
        {
            throw new Exception("SpreadsheetReader_XLSX: File not readable (".$Filepath.")");
        }
    
        $this -> TempDir = isset($Options["TempDir"]) && is_writable($Options["TempDir"]) ?
            $Options["TempDir"] :
            sys_get_temp_dir();
    
        $this -> TempDir = rtrim($this -> TempDir, DIRECTORY_SEPARATOR);
        $this -> TempDir = $this -> TempDir.DIRECTORY_SEPARATOR.uniqid().DIRECTORY_SEPARATOR;
    
        $Zip = new ZipArchive;
        $Status = $Zip -> open($Filepath);
    
        if ($Status !== true)
        {
            throw new Exception("SpreadsheetReader_XLSX: File not readable (".$Filepath.") (Error ".$Status.")");
        }
    
        // Getting the general workbook information
        if ($Zip -> locateName("xl/workbook.xml") !== false)
        {
            $this -> WorkbookXML = new SimpleXMLElement($Zip -> getFromName("xl/workbook.xml"));
        }
    
        // Extracting the XMLs from the XLSX zip file
        if ($Zip -> locateName("xl/sharedStrings.xml") !== false)
        {
            $this -> SharedStringsPath = $this -> TempDir."xl".DIRECTORY_SEPARATOR."sharedStrings.xml";
            $Zip -> extractTo($this -> TempDir, "xl/sharedStrings.xml");
            $this -> TempFiles[] = $this -> TempDir."xl".DIRECTORY_SEPARATOR."sharedStrings.xml";
    
            if (is_readable($this -> SharedStringsPath))
            {
                $this -> SharedStrings = new XMLReader;
                $this -> SharedStrings -> open($this -> SharedStringsPath);
                $this -> PrepareSharedStringCache();
            }
        }
    
        $Sheets = $this -> Sheets();
    
        foreach ($this -> Sheets as $Index => $Name)
        {
            if ($Zip -> locateName("xl/worksheets/sheet".$Index.".xml") !== false)
            {
                $Zip -> extractTo($this -> TempDir, "xl/worksheets/sheet".$Index.".xml");
                $this -> TempFiles[] = $this -> TempDir."xl".DIRECTORY_SEPARATOR."worksheets".DIRECTORY_SEPARATOR."sheet".$Index.".xml";
            }
        }
    
        $this -> ChangeSheet(0);
    
        // If worksheet is present and is OK, parse the styles already
        if ($Zip -> locateName("xl/styles.xml") !== false)
        {
            $this -> StylesXML = new SimpleXMLElement($Zip -> getFromName("xl/styles.xml"));
            if ($this -> StylesXML && $this -> StylesXML -> cellXfs && $this -> StylesXML -> cellXfs -> xf)
            {
                foreach ($this -> StylesXML -> cellXfs -> xf as $Index => $XF)
                {
                    // Format #0 is a special case - it is the "General" format that is applied regardless of applyNumberFormat
                    if ($XF -> attributes() -> applyNumberFormat || (0 == (int)$XF -> attributes() -> numFmtId))
                    {
                        $FormatId = (int)$XF -> attributes() -> numFmtId;
                        // If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts
                        $this -> Styles[] = $FormatId;
                    }
                    else
                    {
                        // 0 for "General" format
                        $this -> Styles[] = 0;
                    }
                }
            }
    
            if ($this -> StylesXML -> numFmts && $this -> StylesXML -> numFmts -> numFmt)
            {
                foreach ($this -> StylesXML -> numFmts -> numFmt as $Index => $NumFmt)
                {
                    $this -> Formats[(int)$NumFmt -> attributes() -> numFmtId] = (string)$NumFmt -> attributes() -> formatCode;
                }
            }
    
            unset($this -> StylesXML);
        }
    
        $Zip -> close();
    
        // Setting base date
        if (!self::$BaseDate)
        {
            self::$BaseDate = new DateTime;
            self::$BaseDate -> setTimezone(new DateTimeZone("UTC"));
            self::$BaseDate -> setDate(1900, 1, 0);
            self::$BaseDate -> setTime(0, 0, 0);
        }
    
        // Decimal and thousand separators
        if (!self::$DecimalSeparator && !self::$ThousandSeparator && !self::$CurrencyCode)
        {
            $Locale = localeconv();
            self::$DecimalSeparator = $Locale["decimal_point"];
            self::$ThousandSeparator = $Locale["thousands_sep"];
            self::$CurrencyCode = $Locale["int_curr_symbol"];
        }
    
        if (function_exists("gmp_gcd"))
        {
            self::$RuntimeInfo["GMPSupported"] = true;
        }
    }
    
    /**
     * Destructor, destroys all that remains (closes and deletes temp files)
     */
    public function __destruct()
    {
        foreach ($this -> TempFiles as $TempFile)
        {
            @unlink($TempFile);
        }
    
        // Better safe than sorry - shouldn"t try deleting "." or "/", or "..".
        if (strlen($this -> TempDir) > 2)
        {
            @rmdir($this -> TempDir."xl".DIRECTORY_SEPARATOR."worksheets");
            @rmdir($this -> TempDir."xl");
            @rmdir($this -> TempDir);
        }
    
        if ($this -> Worksheet && $this -> Worksheet instanceof XMLReader)
        {
            $this -> Worksheet -> close();
            unset($this -> Worksheet);
        }
        unset($this -> WorksheetPath);
    
        if ($this -> SharedStrings && $this -> SharedStrings instanceof XMLReader)
        {
            $this -> SharedStrings -> close();
            unset($this -> SharedStrings);
        }
        unset($this -> SharedStringsPath);
    
        if (isset($this -> StylesXML))
        {
            unset($this -> StylesXML);
        }
        if ($this -> WorkbookXML)
        {
            unset($this -> WorkbookXML);
        }
    }
    
    /**
     * Retrieves an array with information about sheets in the current file
     *
     * @return array List of sheets (key is sheet index, value is name)
     */
    public function Sheets()
    {
        if ($this -> Sheets === false)
        {
            $this -> Sheets = array();
            foreach ($this -> WorkbookXML -> sheets -> sheet as $Index => $Sheet)
            {
                $Attributes = $Sheet -> attributes("r", true);
                foreach ($Attributes as $Name => $Value)
                {
                    if ($Name == "id")
                    {
                        $SheetID = (int)str_replace("rId", "", (string)$Value);
                        break;
                    }
                }
    
                $this -> Sheets[$SheetID] = (string)$Sheet["name"];
            }
            ksort($this -> Sheets);
        }
        return array_values($this -> Sheets);
    }
    
    /**
     * Changes the current sheet in the file to another
     *
     * @param int Sheet index
     *
     * @return bool True if sheet was successfully changed, false otherwise.
     */
    public function ChangeSheet($Index)
    {
        $RealSheetIndex = false;
        $Sheets = $this -> Sheets();
        if (isset($Sheets[$Index]))
        {
            $SheetIndexes = array_keys($this -> Sheets);
            $RealSheetIndex = $SheetIndexes[$Index];
        }
    
        $TempWorksheetPath = $this -> TempDir."xl/worksheets/sheet".$RealSheetIndex.".xml";
    
        if ($RealSheetIndex !== false && is_readable($TempWorksheetPath))
        {
            $this -> WorksheetPath = $TempWorksheetPath;
    
            $this -> rewind();
            return true;
        }
    
        return false;
    }
    
    /**
     * Creating shared string cache if the number of shared strings is acceptably low (or there is no limit on the amount
     */
    private function PrepareSharedStringCache()
    {
        while ($this -> SharedStrings -> read())
        {
            if ($this -> SharedStrings -> name == "sst")
            {
                $this -> SharedStringCount = $this -> SharedStrings -> getAttribute("count");
                break;
            }
        }
    
        if (!$this -> SharedStringCount || (self::SHARED_STRING_CACHE_LIMIT < $this -> SharedStringCount && self::SHARED_STRING_CACHE_LIMIT !== null))
        {
            return false;
        }
    
        $CacheIndex = 0;
        $CacheValue = "";
        while ($this -> SharedStrings -> read())
        {
            switch ($this -> SharedStrings -> name)
            {
                case "si":
                    if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT)
                    {
                        $this -> SharedStringCache[$CacheIndex] = $CacheValue;
                        $CacheIndex++;
                        $CacheValue = "";
                    }
                    break;
                case "t":
                    if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT)
                    {
                        continue;
                    }
                    $CacheValue .= $this -> SharedStrings -> readString();
                    break;
            }
        }
    
        $this -> SharedStrings -> close();
        return true;
    }
    
    /**
     * Retrieves a shared string value by its index
     *
     * @param int Shared string index
     *
     * @return string Value
     */
    private function GetSharedString($Index)
    {
        if ((self::SHARED_STRING_CACHE_LIMIT === null || self::SHARED_STRING_CACHE_LIMIT > 0) && !empty($this -> SharedStringCache))
        {
            if (isset($this -> SharedStringCache[$Index]))
            {
                return $this -> SharedStringCache[$Index];
            }
            else
            {
                return "";
            }
        }
    
        // If the desired index is before the current, rewind the XML
        if ($this -> SharedStringIndex > $Index)
        {
            $this -> SSOpen = false;
            $this -> SharedStrings -> close();
            $this -> SharedStrings -> open($this -> SharedStringsPath);
            $this -> SharedStringIndex = 0;
            $this -> LastSharedStringValue = null;
            $this -> SSForwarded = false;
        }
    
        // Finding the unique string count (if not already read)
        if ($this -> SharedStringIndex == 0 && !$this -> SharedStringCount)
        {
            while ($this -> SharedStrings -> read())
            {
                if ($this -> SharedStrings -> name == "sst")
                {
                    $this -> SharedStringCount = $this -> SharedStrings -> getAttribute("uniqueCount");
                    break;
                }
            }
        }
    
        // If index of the desired string is larger than possible, don"t even bother.
        if ($this -> SharedStringCount && ($Index >= $this -> SharedStringCount))
        {
            return "";
        }
    
        // If an index with the same value as the last already fetched is requested
        // (any further traversing the tree would get us further away from the node)
        if (($Index == $this -> SharedStringIndex) && ($this -> LastSharedStringValue !== null))
        {
            return $this -> LastSharedStringValue;
        }
    
        // Find the correct <si> node with the desired index
        while ($this -> SharedStringIndex <= $Index)
        {
            // SSForwarded is set further to avoid double reading in case nodes are skipped.
            if ($this -> SSForwarded)
            {
                $this -> SSForwarded = false;
            }
            else
            {
                $ReadStatus = $this -> SharedStrings -> read();
                if (!$ReadStatus)
                {
                    break;
                }
            }
    
            if ($this -> SharedStrings -> name == "si")
            {
                if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT)
                {
                    $this -> SSOpen = false;
                    $this -> SharedStringIndex++;
                }
                else
                {
                    $this -> SSOpen = true;
    
                    if ($this -> SharedStringIndex < $Index)
                    {
                        $this -> SSOpen = false;
                        $this -> SharedStrings -> next("si");
                        $this -> SSForwarded = true;
                        $this -> SharedStringIndex++;
                        continue;
                    }
                    else
                    {
                        break;
                    }
                }
            }
        }
    
        $Value = "";
    
        // Extract the value from the shared string
        if ($this -> SSOpen && ($this -> SharedStringIndex == $Index))
        {
            while ($this -> SharedStrings -> read())
            {
                switch ($this -> SharedStrings -> name)
                {
                    case "t":
                        if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT)
                        {
                            continue;
                        }
                        $Value .= $this -> SharedStrings -> readString();
                        break;
                    case "si":
                        if ($this -> SharedStrings -> nodeType == XMLReader::END_ELEMENT)
                        {
                            $this -> SSOpen = false;
                            $this -> SSForwarded = true;
                            break 2;
                        }
                        break;
                }
            }
        }
    
        if ($Value)
        {
            $this -> LastSharedStringValue = $Value;
        }
        return $Value;
    }
    
    /**
     * Formats the value according to the index
     *
     * @param string Cell value
     * @param int Format index
     *
     * @return string Formatted cell value
     */
    private function FormatValue($Value, $Index)
    {
        if (!is_numeric($Value))
        {
            return $Value;
        }
    
        if (isset($this -> Styles[$Index]) && ($this -> Styles[$Index] !== false))
        {
            $Index = $this -> Styles[$Index];
        }
        else
        {
            return $Value;
        }
    
        // A special case for the "General" format
        if ($Index == 0)
        {
            return $this -> GeneralFormat($Value);
        }
    
        $Format = array();
    
        if (isset($this -> ParsedFormatCache[$Index]))
        {
            $Format = $this -> ParsedFormatCache[$Index];
        }
    
        if (!$Format)
        {
            $Format = array(
                "Code" => false,
                "Type" => false,
                "Scale" => 1,
                "Thousands" => false,
                "Currency" => false
            );
    
            if (isset(self::$BuiltinFormats[$Index]))
            {
                $Format["Code"] = self::$BuiltinFormats[$Index];
            }
            elseif (isset($this -> Formats[$Index]))
            {
                $Format["Code"] = $this -> Formats[$Index];
            }
    
            // Format code found, now parsing the format
            if ($Format["Code"])
            {
                $Sections = explode(";", $Format["Code"]);
                $Format["Code"] = $Sections[0];
    
                switch (count($Sections))
                {
                    case 2:
                        if ($Value < 0)
                        {
                            $Format["Code"] = $Sections[1];
                        }
                        break;
                    case 3:
                    case 4:
                        if ($Value < 0)
                        {
                            $Format["Code"] = $Sections[1];
                        }
                        elseif ($Value == 0)
                        {
                            $Format["Code"] = $Sections[2];
                        }
                        break;
                }
            }
    
            // Stripping colors
            $Format["Code"] = trim(preg_replace("{^\[[[:alpha:]]+\]}i", "", $Format["Code"]));
    
            // Percentages
            if (substr($Format["Code"], -1) == "%")
            {
                $Format["Type"] = "Percentage";
            }
            elseif (preg_match("{^(\[\$[[:alpha:]]*-[0-9A-F]*\])*[hmsdy]}i", $Format["Code"]))
            {
                $Format["Type"] = "DateTime";
    
                $Format["Code"] = trim(preg_replace("{^(\[\$[[:alpha:]]*-[0-9A-F]*\])}i", "", $Format["Code"]));
                $Format["Code"] = strtolower($Format["Code"]);
    
                $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["All"]);
                if (strpos($Format["Code"], "A") === false)
                {
                    $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["24H"]);
                }
                else
                {
                    $Format["Code"] = strtr($Format["Code"], self::$DateReplacements["12H"]);
                }
            }
            elseif ($Format["Code"] == "[$EUR ]#,##0.00_-")
            {
                $Format["Type"] = "Euro";
            }
            else
            {
                // Removing skipped characters
                $Format["Code"] = preg_replace("{_.}", "", $Format["Code"]);
                // Removing unnecessary escaping
                $Format["Code"] = preg_replace("{\\\\}", "", $Format["Code"]);
                // Removing string quotes
                $Format["Code"] = str_replace(array(""", "*"), "", $Format["Code"]);
                // Removing thousands separator
                if (strpos($Format["Code"], "0,0") !== false || strpos($Format["Code"], "#,#") !== false)
                {
                    $Format["Thousands"] = true;
                }
                $Format["Code"] = str_replace(array("0,0", "#,#"), array("00", "##"), $Format["Code"]);
    
                // Scaling (Commas indicate the power)
                $Scale = 1;
                $Matches = array();
                if (preg_match("{(0|#)(,+)}", $Format["Code"], $Matches))
                {
                    $Scale = pow(1000, strlen($Matches[2]));
                    // Removing the commas
                    $Format["Code"] = preg_replace(array("{0,+}", "{#,+}"), array("0", "#"), $Format["Code"]);
                }
    
                $Format["Scale"] = $Scale;
    
                if (preg_match("{#?.*\?\/\?}", $Format["Code"]))
                {
                    $Format["Type"] = "Fraction";
                }
                else
                {
                    $Format["Code"] = str_replace("#", "", $Format["Code"]);
    
                    $Matches = array();
                    if (preg_match("{(0+)(\.?)(0*)}", preg_replace("{\[[^\]]+\]}", "", $Format["Code"]), $Matches))
                    {
                        $Integer = $Matches[1];
                        $DecimalPoint = $Matches[2];
                        $Decimals = $Matches[3];
    
                        $Format["MinWidth"] = strlen($Integer) + strlen($DecimalPoint) + strlen($Decimals);
                        $Format["Decimals"] = $Decimals;
                        $Format["Precision"] = strlen($Format["Decimals"]);
                        $Format["Pattern"] = "%0".$Format["MinWidth"].".".$Format["Precision"]."f";
                    }
                }
    
                $Matches = array();
                if (preg_match("{\[\$(.*)\]}u", $Format["Code"], $Matches))
                {
                    $CurrFormat = $Matches[0];
                    $CurrCode = $Matches[1];
                    $CurrCode = explode("-", $CurrCode);
                    if ($CurrCode)
                    {
                        $CurrCode = $CurrCode[0];
                    }
    
                    if (!$CurrCode)
                    {
                        $CurrCode = self::$CurrencyCode;
                    }
    
                    $Format["Currency"] = $CurrCode;
                }
                $Format["Code"] = trim($Format["Code"]);
            }
    
            $this -> ParsedFormatCache[$Index] = $Format;
        }
    
        // Applying format to value
        if ($Format)
        {
            if ($Format["Code"] == "@")
            {
                return (string)$Value;
            }
            // Percentages
            elseif ($Format["Type"] == "Percentage")
            {
                if ($Format["Code"] === "0%")
                {
                    $Value = round(100 * $Value, 0)."%";
                }
                else
                {
                    $Value = sprintf("%.2f%%", round(100 * $Value, 2));
                }
            }
            // Dates and times
            elseif ($Format["Type"] == "DateTime")
            {
                $Days = (int)$Value;
                // Correcting for Feb 29, 1900
                if ($Days > 60)
                {
                    $Days--;
                }
    
                // At this point time is a fraction of a day
                $Time = ($Value - (int)$Value);
                $Seconds = 0;
                if ($Time)
                {
                    // Here time is converted to seconds
                    // Some loss of precision will occur
                    $Seconds = (int)($Time * 86400);
                }
    
                $Value = clone self::$BaseDate;
                $Value -> add(new DateInterval("P".$Days."D".($Seconds ? "T".$Seconds."S" : "")));
    
                if (!$this -> Options["ReturnDateTimeObjects"])
                {
                    $Value = $Value -> format($Format["Code"]);
                }
                else
                {
                    // A DateTime object is returned
                }
            }
            elseif ($Format["Type"] == "Euro")
            {
                $Value = "EUR ".sprintf("%1.2f", $Value);
            }
            else
            {
                // Fractional numbers
                if ($Format["Type"] == "Fraction" && ($Value != (int)$Value))
                {
                    $Integer = floor(abs($Value));
                    $Decimal = fmod(abs($Value), 1);
                    // Removing the integer part and decimal point
                    $Decimal *= pow(10, strlen($Decimal) - 2);
                    $DecimalDivisor = pow(10, strlen($Decimal));
    
                    if (self::$RuntimeInfo["GMPSupported"])
                    {
                        $GCD = gmp_strval(gmp_gcd($Decimal, $DecimalDivisor));
                    }
                    else
                    {
                        $GCD = self::GCD($Decimal, $DecimalDivisor);
                    }
    
                    $AdjDecimal = $DecimalPart/$GCD;
                    $AdjDecimalDivisor = $DecimalDivisor/$GCD;
    
                    if (
                        strpos($Format["Code"], "0") !== false || 
                        strpos($Format["Code"], "#") !== false ||
                        substr($Format["Code"], 0, 3) == "? ?"
                    )
                    {
                        // The integer part is shown separately apart from the fraction
                        $Value = ($Value < 0 ? "-" : "").
                            $Integer ? $Integer." " : "".
                            $AdjDecimal."/".
                            $AdjDecimalDivisor;
                    }
                    else
                    {
                        // The fraction includes the integer part
                        $AdjDecimal += $Integer * $AdjDecimalDivisor;
                        $Value = ($Value < 0 ? "-" : "").
                            $AdjDecimal."/".
                            $AdjDecimalDivisor;
                    }
                }
                else
                {
                    // Scaling
                    $Value = $Value / $Format["Scale"];
    
                    if (!empty($Format["MinWidth"]) && $Format["Decimals"])
                    {
                        if ($Format["Thousands"])
                        {
                            $Value = number_format($Value, $Format["Precision"],
                                self::$DecimalSeparator, self::$ThousandSeparator);
                        }
                        else
                        {
                            $Value = sprintf($Format["Pattern"], $Value);
                        }
    
                        $Value = preg_replace("{(0+)(\.?)(0*)}", $Value, $Format["Code"]);
                    }
                }
    
                // Currency/Accounting
                if ($Format["Currency"])
                {
                    $Value = preg_replace("", $Format["Currency"], $Value);
                }
            }
    
        }
    
        return $Value;
    }
    
    /**
     * Attempts to approximate Excel"s "general" format.
     *
     * @param mixed Value
     *
     * @return mixed Result
     */
    public function GeneralFormat($Value)
    {
        // Numeric format
        if (is_numeric($Value))
        {
            $Value = (float)$Value;
        }
        return $Value;
    }
    
    // !Iterator interface methods
    /** 
     * Rewind the Iterator to the first element.
     * Similar to the reset() function for arrays in PHP
     */ 
    public function rewind()
    {
        // Removed the check whether $this -> Index == 0 otherwise ChangeSheet doesn"t work properly
    
        // If the worksheet was already iterated, XML file is reopened.
        // Otherwise it should be at the beginning anyway
        if ($this -> Worksheet instanceof XMLReader)
        {
            $this -> Worksheet -> close();
        }
        else
        {
            $this -> Worksheet = new XMLReader;
        }
    
        $this -> Worksheet -> open($this -> WorksheetPath);
    
        $this -> Valid = true;
        $this -> RowOpen = false;
        $this -> CurrentRow = false;
        $this -> Index = 0;
    }
    
    /**
     * Return the current element.
     * Similar to the current() function for arrays in PHP
     *
     * @return mixed current element from the collection
     */
    public function current()
    {
        if ($this -> Index == 0 && $this -> CurrentRow === false)
        {
            $this -> next();
            $this -> Index--;
        }
        return $this -> CurrentRow;
    }
    
    /** 
     * Move forward to next element. 
     * Similar to the next() function for arrays in PHP 
     */ 
    public function next()
    {
        $this -> Index++;
    
        $this -> CurrentRow = array();
    
        if (!$this -> RowOpen)
        {
            while ($this -> Valid = $this -> Worksheet -> read())
            {
                if ($this -> Worksheet -> name == "row")
                {
                    // Getting the row spanning area (stored as e.g., 1:12)
                    // so that the last cells will be present, even if empty
                    $RowSpans = $this -> Worksheet -> getAttribute("spans");
                    if ($RowSpans)
                    {
                        $RowSpans = explode(":", $RowSpans);
                        $CurrentRowColumnCount = $RowSpans[1];
                    }
                    else
                    {
                        $CurrentRowColumnCount = 0;
                    }
    
                    if ($CurrentRowColumnCount > 0)
                    {
                        $this -> CurrentRow = array_fill(0, $CurrentRowColumnCount, "");
                    }
    
                    $this -> RowOpen = true;
                    break;
                }
            }
        }
    
        // Reading the necessary row, if found
        if ($this -> RowOpen)
        {
            // These two are needed to control for empty cells
            $MaxIndex = 0;
            $CellCount = 0;
    
            $CellHasSharedString = false;
    
            while ($this -> Valid = $this -> Worksheet -> read())
            {
                switch ($this -> Worksheet -> name)
                {
                    // End of row
                    case "row":
                        if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT)
                        {
                            $this -> RowOpen = false;
                            break 2;
                        }
                        break;
                    // Cell
                    case "c":
                        // If it is a closing tag, skip it
                        if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT)
                        {
                            continue;
                        }
    
                        $StyleId = (int)$this -> Worksheet -> getAttribute("s");
    
                        // Get the index of the cell
                        $Index = $this -> Worksheet -> getAttribute("r");
                        $Letter = preg_replace("{[^[:alpha:]]}S", "", $Index);
                        $Index = self::IndexFromColumnLetter($Letter);
    
                        // Determine cell type
                        if ($this -> Worksheet -> getAttribute("t") == self::CELL_TYPE_SHARED_STR)
                        {
                            $CellHasSharedString = true;
                        }
                        else
                        {
                            $CellHasSharedString = false;
                        }
    
                        $this -> CurrentRow[$Index] = "";
    
                        $CellCount++;
                        if ($Index > $MaxIndex)
                        {
                            $MaxIndex = $Index;
                        }
    
                        break;
                    // Cell value
                    case "v":
                    case "is":
                        if ($this -> Worksheet -> nodeType == XMLReader::END_ELEMENT)
                        {
                            continue;
                        }
    
                        $Value = $this -> Worksheet -> readString();
    
                        if ($CellHasSharedString)
                        {
                            $Value = $this -> GetSharedString($Value);
                        }
    
                        // Format value if necessary
                        if ($Value !== "" && $StyleId && isset($this -> Styles[$StyleId]))
                        {
                            $Value = $this -> FormatValue($Value, $StyleId);
                        }
                        elseif ($Value)
                        {
                            $Value = $this -> GeneralFormat($Value);
                        }
    
                        $this -> CurrentRow[$Index] = $Value;
                        break;
                }
            }
    
            // Adding empty cells, if necessary
            // Only empty cells inbetween and on the left side are added
            if ($MaxIndex + 1 > $CellCount)
            {
                $this -> CurrentRow = $this -> CurrentRow + array_fill(0, $MaxIndex + 1, "");
                ksort($this -> CurrentRow);
            }
        }
    
        return $this -> CurrentRow;
    }
    
    /** 
     * Return the identifying key of the current element.
     * Similar to the key() function for arrays in PHP
     *
     * @return mixed either an integer or a string
     */ 
    public function key()
    {
        return $this -> Index;
    }
    
    /** 
     * Check if there is a current element after calls to rewind() or next().
     * Used to check if we"ve iterated to the end of the collection
     *
     * @return boolean FALSE if there"s nothing more to iterate over
     */ 
    public function valid()
    {
        return $this -> Valid;
    }
    
    // !Countable interface method
    /**
     * Ostensibly should return the count of the contained items but this just returns the number
     * of rows read so far. It"s not really correct but at least coherent.
     */
    public function count()
    {
        return $this -> Index + 1;
    }
    
    /**
     * Takes the column letter and converts it to a numerical index (0-based)
     *
     * @param string Letter(s) to convert
     *
     * @return mixed Numeric index (0-based) or boolean false if it cannot be calculated
     */
    public static function IndexFromColumnLetter($Letter)
    {
        $Powers = array();
    
        $Letter = strtoupper($Letter);
    
        $Result = 0;
        for ($i = strlen($Letter) - 1, $j = 0; $i >= 0; $i--, $j++)
        {
            $Ord = ord($Letter[$i]) - 64;
            if ($Ord > 26)
            {
                // Something is very, very wrong
                return false;
            }
            $Result += $Ord * pow(26, $j);
        }
        return $Result - 1;
    }
    
    /**
     * Helper function for greatest common divisor calculation in case GMP extension is
     *  not enabled
     *
     * @param int Number #1
     * @param int Number #2
     *
     * @param int Greatest common divisor
     */
    public static function GCD($A, $B)
    {
        $A = abs($A);
        $B = abs($B);
        if ($A + $B == 0)
        {
            return 0;
        }
        else
        {
            $C = 1;
    
            while ($A > 0)
            {
                $C = $A;
                $A = $B % $A;
                $B = $C;
            }
    
            return $C;
        }
    }
    

    }
    ?>

example Call :

require("SpreadsheetReader.php");
$Reader = new SpreadsheetReader($inputfile);
$totalCount = 0;
$test=0;

foreach ($Reader as $Row)
{

    $count_rows = $Row;

    foreach($count_rows as $countRow){
        if($countRow != ""){
            $taa = explode(" ",$countRow);
            $tCount = count($taa);
            $result[]=$countRow." ";
        }

    }

}
return implode("",$result);
Enter fullscreen mode Exit fullscreen mode

FULL REFERENCE

https://github.com/nuovo/spreadsheet-reader

Top comments (0)