Libcroco
cr-input.c
Go to the documentation of this file.
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8-*- */
00002 
00003 /*
00004  * This file is part of The Croco Library
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of version 2.1 of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00018  * USA
00019  *
00020  * Author: Dodji Seketeli
00021  * See COPYRIGHTS file for copyright information.
00022  */
00023 
00024 #include "stdio.h"
00025 #include <string.h>
00026 #include "cr-input.h"
00027 #include "cr-enc-handler.h"
00028 
00029 /**
00030  *@CRInput:
00031  *
00032  *The definition of the #CRInput class.
00033  */
00034 
00035 /*******************
00036  *Private type defs
00037  *******************/
00038 
00039 /**
00040  *The private attributes of
00041  *the #CRInputPriv class.
00042  */
00043 struct _CRInputPriv {
00044         /*
00045          *The input buffer
00046          */
00047         guchar *in_buf;
00048         gulong in_buf_size;
00049 
00050         gulong nb_bytes;
00051 
00052         /*
00053          *The index of the next byte
00054          *to be read.
00055          */
00056         gulong next_byte_index;
00057 
00058         /*
00059          *The current line number
00060          */
00061         gulong line;
00062 
00063         /*
00064          *The current col number
00065          */
00066         gulong col;
00067 
00068         gboolean end_of_line;
00069         gboolean end_of_input;
00070 
00071         /*
00072          *the reference count of this
00073          *instance.
00074          */
00075         guint ref_count;
00076         gboolean free_in_buf;
00077 };
00078 
00079 #define PRIVATE(object) (object)->priv
00080 
00081 /***************************
00082  *private constants
00083  **************************/
00084 #define CR_INPUT_MEM_CHUNK_SIZE 1024 * 4
00085 
00086 static CRInput *cr_input_new_real (void);
00087 
00088 static CRInput *
00089 cr_input_new_real (void)
00090 {
00091         CRInput *result = NULL;
00092 
00093         result = g_try_malloc (sizeof (CRInput));
00094         if (!result) {
00095                 cr_utils_trace_info ("Out of memory");
00096                 return NULL;
00097         }
00098         memset (result, 0, sizeof (CRInput));
00099 
00100         PRIVATE (result) = g_try_malloc (sizeof (CRInputPriv));
00101         if (!PRIVATE (result)) {
00102                 cr_utils_trace_info ("Out of memory");
00103                 g_free (result);
00104                 return NULL;
00105         }
00106         memset (PRIVATE (result), 0, sizeof (CRInputPriv));
00107         PRIVATE (result)->free_in_buf = TRUE;
00108         return result;
00109 }
00110 
00111 /****************
00112  *Public methods
00113  ***************/
00114 
00115 /**
00116  * cr_input_new_from_buf:
00117  *@a_buf: the memory buffer to create the input stream from.
00118  *The #CRInput keeps this pointer so user should not free it !.
00119  *@a_len: the size of the input buffer.
00120  *@a_enc: the buffer's encoding.
00121  *@a_free_buf: if set to TRUE, this a_buf will be freed
00122  *at the destruction of this instance. If set to false, it is up
00123  *to the caller to free it.
00124  *
00125  *Creates a new input stream from a memory buffer.
00126  *Returns the newly built instance of #CRInput.
00127  */
00128 CRInput *
00129 cr_input_new_from_buf (guchar * a_buf,
00130                        gulong a_len,
00131                        enum CREncoding a_enc,
00132                        gboolean a_free_buf)
00133 {
00134         CRInput *result = NULL;
00135         enum CRStatus status = CR_OK;
00136         CREncHandler *enc_handler = NULL;
00137         gulong len = a_len;
00138 
00139         g_return_val_if_fail (a_buf, NULL);
00140 
00141         result = cr_input_new_real ();
00142         g_return_val_if_fail (result, NULL);
00143 
00144         /*transform the encoding in utf8 */
00145         if (a_enc != CR_UTF_8) {
00146                 enc_handler = cr_enc_handler_get_instance (a_enc);
00147                 if (!enc_handler) {
00148                         goto error;
00149                 }
00150 
00151                 status = cr_enc_handler_convert_input
00152                         (enc_handler, a_buf, &len,
00153                          &PRIVATE (result)->in_buf,
00154                          &PRIVATE (result)->in_buf_size);
00155                 if (status != CR_OK)
00156                         goto error;
00157                 PRIVATE (result)->free_in_buf = TRUE;
00158                 if (a_free_buf == TRUE && a_buf) {
00159                         g_free (a_buf) ;
00160                         a_buf = NULL ;
00161                 }                
00162                 PRIVATE (result)->nb_bytes = PRIVATE (result)->in_buf_size;
00163         } else {
00164                 PRIVATE (result)->in_buf = (guchar *) a_buf;
00165                 PRIVATE (result)->in_buf_size = a_len;
00166                 PRIVATE (result)->nb_bytes = a_len;
00167                 PRIVATE (result)->free_in_buf = a_free_buf;
00168         }
00169         PRIVATE (result)->line = 1;
00170         PRIVATE (result)->col =  0;
00171         return result;
00172 
00173  error:
00174         if (result) {
00175                 cr_input_destroy (result);
00176                 result = NULL;
00177         }
00178 
00179         return NULL;
00180 }
00181 
00182 /**
00183  * cr_input_new_from_uri:
00184  *@a_file_uri: the file to create *the input stream from.
00185  *@a_enc: the encoding of the file *to create the input from.
00186  *
00187  *Creates a new input stream from
00188  *a file.
00189  *
00190  *Returns the newly created input stream if
00191  *this method could read the file and create it,
00192  *NULL otherwise.
00193  */
00194 
00195 CRInput *
00196 cr_input_new_from_uri (const gchar * a_file_uri, enum CREncoding a_enc)
00197 {
00198         CRInput *result = NULL;
00199         enum CRStatus status = CR_OK;
00200         FILE *file_ptr = NULL;
00201         guchar tmp_buf[CR_INPUT_MEM_CHUNK_SIZE] = { 0 };
00202         gulong nb_read = 0,
00203                 len = 0,
00204                 buf_size = 0;
00205         gboolean loop = TRUE;
00206         guchar *buf = NULL;
00207 
00208         g_return_val_if_fail (a_file_uri, NULL);
00209 
00210         file_ptr = fopen (a_file_uri, "r");
00211 
00212         if (file_ptr == NULL) {
00213 
00214 #ifdef CR_DEBUG
00215                 cr_utils_trace_debug ("could not open file");
00216 #endif
00217                 g_warning ("Could not open file %s\n", a_file_uri);
00218 
00219                 return NULL;
00220         }
00221 
00222         /*load the file */
00223         while (loop) {
00224                 nb_read = fread (tmp_buf, 1 /*read bytes */ ,
00225                                  CR_INPUT_MEM_CHUNK_SIZE /*nb of bytes */ ,
00226                                  file_ptr);
00227 
00228                 if (nb_read != CR_INPUT_MEM_CHUNK_SIZE) {
00229                         /*we read less chars than we wanted */
00230                         if (feof (file_ptr)) {
00231                                 /*we reached eof */
00232                                 loop = FALSE;
00233                         } else {
00234                                 /*a pb occurred !! */
00235                                 cr_utils_trace_debug ("an io error occurred");
00236                                 status = CR_ERROR;
00237                                 goto cleanup;
00238                         }
00239                 }
00240 
00241                 if (status == CR_OK) {
00242                         /*read went well */
00243                         buf = g_realloc (buf, len + CR_INPUT_MEM_CHUNK_SIZE);
00244                         memcpy (buf + len, tmp_buf, nb_read);
00245                         len += nb_read;
00246                         buf_size += CR_INPUT_MEM_CHUNK_SIZE;
00247                 }
00248         }
00249 
00250         if (status == CR_OK) {
00251                 result = cr_input_new_from_buf (buf, len, a_enc, TRUE);
00252                 if (!result) {
00253                         goto cleanup;
00254                 }
00255                 /*
00256                  *we should  free buf here because it's own by CRInput.
00257                  *(see the last parameter of cr_input_new_from_buf().
00258                  */
00259                 buf = NULL ;
00260         }
00261 
00262  cleanup:
00263         if (file_ptr) {
00264                 fclose (file_ptr);
00265                 file_ptr = NULL;
00266         }
00267 
00268         if (buf) {
00269                 g_free (buf);
00270                 buf = NULL;
00271         }
00272 
00273         return result;
00274 }
00275 
00276 /**
00277  * cr_input_destroy:
00278  *@a_this: the current instance of #CRInput.
00279  *
00280  *The destructor of the #CRInput class.
00281  */
00282 void
00283 cr_input_destroy (CRInput * a_this)
00284 {
00285         if (a_this == NULL)
00286                 return;
00287 
00288         if (PRIVATE (a_this)) {
00289                 if (PRIVATE (a_this)->in_buf && PRIVATE (a_this)->free_in_buf) {
00290                         g_free (PRIVATE (a_this)->in_buf);
00291                         PRIVATE (a_this)->in_buf = NULL;
00292                 }
00293 
00294                 g_free (PRIVATE (a_this));
00295                 PRIVATE (a_this) = NULL;
00296         }
00297 
00298         g_free (a_this);
00299 }
00300 
00301 /**
00302  * cr_input_ref:
00303  *@a_this: the current instance of #CRInput.
00304  *
00305  *Increments the reference count of the current
00306  *instance of #CRInput.
00307  */
00308 void
00309 cr_input_ref (CRInput * a_this)
00310 {
00311         g_return_if_fail (a_this && PRIVATE (a_this));
00312 
00313         PRIVATE (a_this)->ref_count++;
00314 }
00315 
00316 /**
00317  * cr_input_unref:
00318  *@a_this: the current instance of #CRInput.
00319  *
00320  *Decrements the reference count of this instance
00321  *of #CRInput. If the reference count goes down to
00322  *zero, this instance is destroyed.
00323  *
00324  * Returns TRUE if the instance of #CRInput got destroyed, false otherwise.
00325  */
00326 gboolean
00327 cr_input_unref (CRInput * a_this)
00328 {
00329         g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
00330 
00331         if (PRIVATE (a_this)->ref_count) {
00332                 PRIVATE (a_this)->ref_count--;
00333         }
00334 
00335         if (PRIVATE (a_this)->ref_count == 0) {
00336                 cr_input_destroy (a_this);
00337                 return TRUE;
00338         }
00339         return FALSE;
00340 }
00341 
00342 /**
00343  * cr_input_end_of_input:
00344  *@a_this: the current instance of #CRInput.
00345  *@a_end_of_input: out parameter. Is set to TRUE if
00346  *the current instance has reached the end of its input buffer,
00347  *FALSE otherwise.
00348  *
00349  *Tests wether the current instance of
00350  *#CRInput has reached its input buffer.
00351  *
00352  * Returns CR_OK upon successful completion, an error code otherwise.
00353  * Note that all the out parameters of this method are valid if
00354  * and only if this method returns CR_OK.
00355  */
00356 enum CRStatus
00357 cr_input_end_of_input (CRInput const * a_this, gboolean * a_end_of_input)
00358 {
00359         g_return_val_if_fail (a_this && PRIVATE (a_this)
00360                               && a_end_of_input, CR_BAD_PARAM_ERROR);
00361 
00362         *a_end_of_input = (PRIVATE (a_this)->next_byte_index
00363                            >= PRIVATE (a_this)->in_buf_size) ? TRUE : FALSE;
00364 
00365         return CR_OK;
00366 }
00367 
00368 /**
00369  * cr_input_get_nb_bytes_left:
00370  *@a_this: the current instance of #CRInput.
00371  *
00372  *Returns the number of bytes left in the input stream
00373  *before the end, -1 in case of error.
00374  */
00375 glong
00376 cr_input_get_nb_bytes_left (CRInput const * a_this)
00377 {
00378         g_return_val_if_fail (a_this && PRIVATE (a_this), -1);
00379         g_return_val_if_fail (PRIVATE (a_this)->nb_bytes
00380                               <= PRIVATE (a_this)->in_buf_size, -1);
00381         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index
00382                               <= PRIVATE (a_this)->nb_bytes, -1);
00383 
00384         if (PRIVATE (a_this)->end_of_input)
00385                 return 0;
00386 
00387         return PRIVATE (a_this)->nb_bytes - PRIVATE (a_this)->next_byte_index;
00388 }
00389 
00390 /**
00391  * cr_input_read_byte:
00392  *@a_this: the current instance of #CRInput.
00393  *@a_byte: out parameter the returned byte.
00394  *
00395  *Gets the next byte of the input.
00396  *Updates the state of the input so that
00397  *the next invocation of this method  returns
00398  *the next coming byte.
00399  *
00400  *Returns CR_OK upon successful completion, an error code
00401  *otherwise. All the out parameters of this method are valid if
00402  *and only if this method returns CR_OK.
00403  */
00404 enum CRStatus
00405 cr_input_read_byte (CRInput * a_this, guchar * a_byte)
00406 {
00407         g_return_val_if_fail (a_this && PRIVATE (a_this)
00408                               && a_byte, CR_BAD_PARAM_ERROR);
00409 
00410         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index <=
00411                               PRIVATE (a_this)->nb_bytes, CR_BAD_PARAM_ERROR);
00412 
00413         if (PRIVATE (a_this)->end_of_input == TRUE)
00414                 return CR_END_OF_INPUT_ERROR;
00415 
00416         *a_byte = PRIVATE (a_this)->in_buf[PRIVATE (a_this)->next_byte_index];
00417 
00418         if (PRIVATE (a_this)->nb_bytes -
00419             PRIVATE (a_this)->next_byte_index < 2) {
00420                 PRIVATE (a_this)->end_of_input = TRUE;
00421         } else {
00422                 PRIVATE (a_this)->next_byte_index++;
00423         }
00424 
00425         return CR_OK;
00426 }
00427 
00428 /**
00429  * cr_input_read_char:
00430  *@a_this: the current instance of CRInput.
00431  *@a_char: out parameter. The read character.
00432  *
00433  *Reads an unicode character from the current instance of
00434  *#CRInput.
00435  *
00436  *Returns CR_OK upon successful completion, an error code
00437  *otherwise.
00438  */
00439 enum CRStatus
00440 cr_input_read_char (CRInput * a_this, guint32 * a_char)
00441 {
00442         enum CRStatus status = CR_OK;
00443         gulong consumed = 0,
00444                 nb_bytes_left = 0;
00445 
00446         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
00447                               CR_BAD_PARAM_ERROR);
00448 
00449         if (PRIVATE (a_this)->end_of_input == TRUE)
00450                 return CR_END_OF_INPUT_ERROR;
00451 
00452         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
00453 
00454         if (nb_bytes_left < 1) {
00455                 return CR_END_OF_INPUT_ERROR;
00456         }
00457 
00458         status = cr_utils_read_char_from_utf8_buf
00459                 (PRIVATE (a_this)->in_buf
00460                  +
00461                  PRIVATE (a_this)->next_byte_index,
00462                  nb_bytes_left, a_char, &consumed);
00463 
00464         if (status == CR_OK) {
00465                 /*update next byte index */
00466                 PRIVATE (a_this)->next_byte_index += consumed;
00467 
00468                 /*update line and column number */
00469                 if (PRIVATE (a_this)->end_of_line == TRUE) {
00470                         PRIVATE (a_this)->col = 1;
00471                         PRIVATE (a_this)->line++;
00472                         PRIVATE (a_this)->end_of_line = FALSE;
00473                 } else if (*a_char != '\n') {
00474                         PRIVATE (a_this)->col++;
00475                 }
00476 
00477                 if (*a_char == '\n') {
00478                         PRIVATE (a_this)->end_of_line = TRUE;
00479                 }
00480 
00481         }
00482 
00483         return status;
00484 }
00485 
00486 /**
00487  * cr_input_set_line_num:
00488  *@a_this: the "this pointer" of the current instance of #CRInput.
00489  *@a_line_num: the new line number.
00490  *
00491  *Setter of the current line number.
00492  *
00493  *Return CR_OK upon successful completion, an error code otherwise.
00494  */
00495 enum CRStatus
00496 cr_input_set_line_num (CRInput * a_this, glong a_line_num)
00497 {
00498         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00499 
00500         PRIVATE (a_this)->line = a_line_num;
00501 
00502         return CR_OK;
00503 }
00504 
00505 /**
00506  * cr_input_get_line_num:
00507  *@a_this: the "this pointer" of the current instance of #CRInput.
00508  *@a_line_num: the returned line number.
00509  *
00510  *Getter of the current line number.
00511  *
00512  *Returns CR_OK upon successful completion, an error code otherwise.
00513  */
00514 enum CRStatus
00515 cr_input_get_line_num (CRInput const * a_this, glong * a_line_num)
00516 {
00517         g_return_val_if_fail (a_this && PRIVATE (a_this)
00518                               && a_line_num, CR_BAD_PARAM_ERROR);
00519 
00520         *a_line_num = PRIVATE (a_this)->line;
00521 
00522         return CR_OK;
00523 }
00524 
00525 /**
00526  * cr_input_set_column_num:
00527  *@a_this: the "this pointer" of the current instance of #CRInput.
00528  *@a_col: the new column number.
00529  *
00530  *Setter of the current column number.
00531  *
00532  *Returns CR_OK upon successful completion, an error code otherwise.
00533  */
00534 enum CRStatus
00535 cr_input_set_column_num (CRInput * a_this, glong a_col)
00536 {
00537         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00538 
00539         PRIVATE (a_this)->col = a_col;
00540 
00541         return CR_OK;
00542 }
00543 
00544 /**
00545  * cr_input_get_column_num:
00546  *@a_this: the "this pointer" of the current instance of #CRInput.
00547  *@a_col: out parameter
00548  *
00549  *Getter of the current column number.
00550  *
00551  *Returns CR_OK upon successful completion, an error code otherwise.
00552  */
00553 enum CRStatus
00554 cr_input_get_column_num (CRInput const * a_this, glong * a_col)
00555 {
00556         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_col,
00557                               CR_BAD_PARAM_ERROR);
00558 
00559         *a_col = PRIVATE (a_this)->col;
00560 
00561         return CR_OK;
00562 }
00563 
00564 /**
00565  * cr_input_increment_line_num:
00566  *@a_this: the "this pointer" of the current instance of #CRInput.
00567  *@a_increment: the increment to add to the line number.
00568  *
00569  *Increments the current line number.
00570  *
00571  *Returns CR_OK upon successful completion, an error code otherwise.
00572  */
00573 enum CRStatus
00574 cr_input_increment_line_num (CRInput * a_this, glong a_increment)
00575 {
00576         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00577 
00578         PRIVATE (a_this)->line += a_increment;
00579 
00580         return CR_OK;
00581 }
00582 
00583 /**
00584  * cr_input_increment_col_num:
00585  *@a_this: the "this pointer" of the current instance of #CRInput.
00586  *@a_increment: the increment to add to the column number.
00587  *
00588  *Increments the current column number.
00589  *
00590  *Returns CR_OK upon successful completion, an error code otherwise.
00591  */
00592 enum CRStatus
00593 cr_input_increment_col_num (CRInput * a_this, glong a_increment)
00594 {
00595         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00596 
00597         PRIVATE (a_this)->col += a_increment;
00598 
00599         return CR_OK;
00600 }
00601 
00602 /**
00603  * cr_input_consume_char:
00604  *@a_this: the this pointer.
00605  *@a_char: the character to consume. If set to zero,
00606  *consumes any character.
00607  *
00608  *Consumes the next character of the input stream if
00609  *and only if that character equals a_char.
00610  *
00611  *Returns CR_OK upon successful completion, CR_PARSING_ERROR if
00612  *next char is different from a_char, an other error code otherwise
00613  */
00614 enum CRStatus
00615 cr_input_consume_char (CRInput * a_this, guint32 a_char)
00616 {
00617         guint32 c;
00618         enum CRStatus status;
00619 
00620         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00621 
00622         if ((status = cr_input_peek_char (a_this, &c)) != CR_OK) {
00623                 return status;
00624         }
00625 
00626         if (c == a_char || a_char == 0) {
00627                 status = cr_input_read_char (a_this, &c);
00628         } else {
00629                 return CR_PARSING_ERROR;
00630         }
00631 
00632         return status;
00633 }
00634 
00635 /**
00636  * cr_input_consume_chars:
00637  *@a_this: the this pointer of the current instance of #CRInput.
00638  *@a_char: the character to consume.
00639  *@a_nb_char: in/out parameter. The number of characters to consume.
00640  *If set to a negative value, the function will consume all the occurences
00641  *of a_char found.
00642  *After return, if the return value equals CR_OK, this variable contains 
00643  *the number of characters actually consumed.
00644  *
00645  *Consumes up to a_nb_char occurences of the next contiguous characters 
00646  *which equal a_char. Note that the next character of the input stream
00647  **MUST* equal a_char to trigger the consumption, or else, the error
00648  *code CR_PARSING_ERROR is returned.
00649  *If the number of contiguous characters that equals a_char is less than
00650  *a_nb_char, then this function consumes all the characters it can consume.
00651  * 
00652  *Returns CR_OK if at least one character has been consumed, an error code
00653  *otherwise.
00654  */
00655 enum CRStatus
00656 cr_input_consume_chars (CRInput * a_this, guint32 a_char, gulong * a_nb_char)
00657 {
00658         enum CRStatus status = CR_OK;
00659         gulong nb_consumed = 0;
00660 
00661         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_char,
00662                               CR_BAD_PARAM_ERROR);
00663 
00664         g_return_val_if_fail (a_char != 0 || a_nb_char != NULL,
00665                               CR_BAD_PARAM_ERROR);
00666 
00667         for (nb_consumed = 0; ((status == CR_OK)
00668                                && (*a_nb_char > 0
00669                                    && nb_consumed < *a_nb_char));
00670              nb_consumed++) {
00671                 status = cr_input_consume_char (a_this, a_char);
00672         }
00673 
00674         *a_nb_char = nb_consumed;
00675 
00676         if ((nb_consumed > 0)
00677             && ((status == CR_PARSING_ERROR)
00678                 || (status == CR_END_OF_INPUT_ERROR))) {
00679                 status = CR_OK;
00680         }
00681 
00682         return status;
00683 }
00684 
00685 /**
00686  * cr_input_consume_white_spaces:
00687  *@a_this: the "this pointer" of the current instance of #CRInput.
00688  *@a_nb_chars: in/out parameter. The number of white spaces to
00689  *consume. After return, holds the number of white spaces actually consumed.
00690  *
00691  *Same as cr_input_consume_chars() but this one consumes white
00692  *spaces.
00693  *
00694  *Returns CR_OK upon successful completion, an error code otherwise.
00695  */
00696 enum CRStatus
00697 cr_input_consume_white_spaces (CRInput * a_this, gulong * a_nb_chars)
00698 {
00699         enum CRStatus status = CR_OK;
00700         guint32 cur_char = 0,
00701                 nb_consumed = 0;
00702 
00703         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_chars,
00704                               CR_BAD_PARAM_ERROR);
00705 
00706         for (nb_consumed = 0;
00707              ((*a_nb_chars > 0) && (nb_consumed < *a_nb_chars));
00708              nb_consumed++) {
00709                 status = cr_input_peek_char (a_this, &cur_char);
00710                 if (status != CR_OK)
00711                         break;
00712 
00713                 /*if the next char is a white space, consume it ! */
00714                 if (cr_utils_is_white_space (cur_char) == TRUE) {
00715                         status = cr_input_read_char (a_this, &cur_char);
00716                         if (status != CR_OK)
00717                                 break;
00718                         continue;
00719                 }
00720 
00721                 break;
00722 
00723         }
00724 
00725         *a_nb_chars = (gulong) nb_consumed;
00726 
00727         if (nb_consumed && status == CR_END_OF_INPUT_ERROR) {
00728                 status = CR_OK;
00729         }
00730 
00731         return status;
00732 }
00733 
00734 /**
00735  * cr_input_peek_char:
00736  *@a_this: the current instance of #CRInput.
00737  *@a_char: out parameter. The returned character.
00738  *
00739  *Same as cr_input_read_char() but does not update the
00740  *internal state of the input stream. The next call
00741  *to cr_input_peek_char() or cr_input_read_char() will thus
00742  *return the same character as the current one.
00743  *
00744  *Returns CR_OK upon successful completion, an error code
00745  *otherwise.
00746  */
00747 enum CRStatus
00748 cr_input_peek_char (CRInput const * a_this, guint32 * a_char)
00749 {
00750         enum CRStatus status = CR_OK;
00751         gulong consumed = 0,
00752                 nb_bytes_left = 0;
00753 
00754         g_return_val_if_fail (a_this && PRIVATE (a_this)
00755                               && a_char, CR_BAD_PARAM_ERROR);
00756 
00757         if (PRIVATE (a_this)->next_byte_index >=
00758             PRIVATE (a_this)->in_buf_size) {
00759                 return CR_END_OF_INPUT_ERROR;
00760         }
00761 
00762         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
00763 
00764         if (nb_bytes_left < 1) {
00765                 return CR_END_OF_INPUT_ERROR;
00766         }
00767 
00768         status = cr_utils_read_char_from_utf8_buf
00769                 (PRIVATE (a_this)->in_buf +
00770                  PRIVATE (a_this)->next_byte_index,
00771                  nb_bytes_left, a_char, &consumed);
00772 
00773         return status;
00774 }
00775 
00776 /**
00777  * cr_input_peek_byte:
00778  *@a_this: the current instance of #CRInput.
00779  *@a_origin: the origin to consider in the calculation
00780  *of the position of the byte to peek.
00781  *@a_offset: the offset of the byte to peek, starting from
00782  *the origin specified by a_origin.
00783  *@a_byte: out parameter the peeked byte.
00784  *
00785  *Gets a byte from the input stream,
00786  *starting from the current position in the input stream.
00787  *Unlike cr_input_peek_next_byte() this method
00788  *does not update the state of the current input stream.
00789  *Subsequent calls to cr_input_peek_byte with the same arguments
00790  *will return the same byte.
00791  *
00792  *Returns CR_OK upon successful completion or,
00793  *CR_BAD_PARAM_ERROR if at least one of the parameters is invalid;
00794  *CR_OUT_OF_BOUNDS_ERROR if the indexed byte is out of bounds.
00795  */
00796 enum CRStatus
00797 cr_input_peek_byte (CRInput const * a_this, enum CRSeekPos a_origin,
00798                     gulong a_offset, guchar * a_byte)
00799 {
00800         gulong abs_offset = 0;
00801 
00802         g_return_val_if_fail (a_this && PRIVATE (a_this)
00803                               && a_byte, CR_BAD_PARAM_ERROR);
00804 
00805         switch (a_origin) {
00806 
00807         case CR_SEEK_CUR:
00808                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_offset;
00809                 break;
00810 
00811         case CR_SEEK_BEGIN:
00812                 abs_offset = a_offset;
00813                 break;
00814 
00815         case CR_SEEK_END:
00816                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_offset;
00817                 break;
00818 
00819         default:
00820                 return CR_BAD_PARAM_ERROR;
00821         }
00822 
00823         if (abs_offset < PRIVATE (a_this)->in_buf_size) {
00824 
00825                 *a_byte = PRIVATE (a_this)->in_buf[abs_offset];
00826 
00827                 return CR_OK;
00828 
00829         } else {
00830                 return CR_END_OF_INPUT_ERROR;
00831         }
00832 }
00833 
00834 /**
00835  * cr_input_peek_byte2:
00836  *@a_this: the current byte input stream.
00837  *@a_offset: the offset of the byte to peek, starting
00838  *from the current input position pointer.
00839  *@a_eof: out parameter. Is set to true is we reach end of
00840  *stream. If set to NULL by the caller, this parameter is not taken
00841  *in account.
00842  *
00843  *Same as cr_input_peek_byte() but with a simplified
00844  *interface.
00845  *
00846  *Returns the read byte or 0 if something bad happened.
00847  */
00848 guchar
00849 cr_input_peek_byte2 (CRInput const * a_this, gulong a_offset, gboolean * a_eof)
00850 {
00851         guchar result = 0;
00852         enum CRStatus status = CR_ERROR;
00853 
00854         g_return_val_if_fail (a_this && PRIVATE (a_this), 0);
00855 
00856         if (a_eof)
00857                 *a_eof = FALSE;
00858 
00859         status = cr_input_peek_byte (a_this, CR_SEEK_CUR, a_offset, &result);
00860 
00861         if ((status == CR_END_OF_INPUT_ERROR)
00862             && a_eof)
00863                 *a_eof = TRUE;
00864 
00865         return result;
00866 }
00867 
00868 /**
00869  * cr_input_get_byte_addr:
00870  *@a_this: the current instance of #CRInput.
00871  *@a_offset: the offset of the byte in the input stream starting
00872  *from the beginning of the stream.
00873  *
00874  *Gets the memory address of the byte located at a given offset
00875  *in the input stream.
00876  *
00877  *Returns the address, otherwise NULL if an error occurred.
00878  */
00879 guchar *
00880 cr_input_get_byte_addr (CRInput * a_this, gulong a_offset)
00881 {
00882         g_return_val_if_fail (a_this && PRIVATE (a_this), NULL);
00883 
00884         if (a_offset >= PRIVATE (a_this)->nb_bytes) {
00885                 return NULL;
00886         }
00887 
00888         return &PRIVATE (a_this)->in_buf[a_offset];
00889 }
00890 
00891 /**
00892  * cr_input_get_cur_byte_addr:
00893  *@a_this: the current input stream
00894  *@a_offset: out parameter. The returned address.
00895  *
00896  *Gets the address of the current character pointer.
00897  *
00898  *Returns CR_OK upon successful completion, an error code otherwise.
00899  */
00900 enum CRStatus
00901 cr_input_get_cur_byte_addr (CRInput * a_this, guchar ** a_offset)
00902 {
00903         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_offset,
00904                               CR_BAD_PARAM_ERROR);
00905 
00906         if (!PRIVATE (a_this)->next_byte_index) {
00907                 return CR_START_OF_INPUT_ERROR;
00908         }
00909 
00910         *a_offset = cr_input_get_byte_addr
00911                 (a_this, PRIVATE (a_this)->next_byte_index - 1);
00912 
00913         return CR_OK;
00914 }
00915 
00916 /**
00917  * cr_input_seek_index:
00918  *@a_this: the current instance of #CRInput.
00919  *@a_origin: the origin to consider during the calculation
00920  *of the absolute position of the new "current byte index".
00921  *@a_pos: the relative offset of the new "current byte index."
00922  *This offset is relative to the origin a_origin.
00923  *
00924  *Sets the "current byte index" of the current instance
00925  *of #CRInput. Next call to cr_input_get_byte() will return
00926  *the byte next after the new "current byte index".
00927  *
00928  *Returns CR_OK upon successful completion otherwise returns
00929  *CR_BAD_PARAM_ERROR if at least one of the parameters is not valid
00930  *or CR_OUT_BOUNDS_ERROR in case of error.
00931  */
00932 enum CRStatus
00933 cr_input_seek_index (CRInput * a_this, enum CRSeekPos a_origin, gint a_pos)
00934 {
00935 
00936         glong abs_offset = 0;
00937 
00938         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00939 
00940         switch (a_origin) {
00941 
00942         case CR_SEEK_CUR:
00943                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_pos;
00944                 break;
00945 
00946         case CR_SEEK_BEGIN:
00947                 abs_offset = a_pos;
00948                 break;
00949 
00950         case CR_SEEK_END:
00951                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_pos;
00952                 break;
00953 
00954         default:
00955                 return CR_BAD_PARAM_ERROR;
00956         }
00957 
00958         if ((abs_offset > 0)
00959             && (gulong) abs_offset < PRIVATE (a_this)->nb_bytes) {
00960 
00961                 /*update the input stream's internal state */
00962                 PRIVATE (a_this)->next_byte_index = abs_offset + 1;
00963 
00964                 return CR_OK;
00965         }
00966 
00967         return CR_OUT_OF_BOUNDS_ERROR;
00968 }
00969 
00970 /**
00971  * cr_input_get_cur_pos:
00972  *@a_this: the current instance of #CRInput.
00973  *@a_pos: out parameter. The returned position.
00974  *
00975  *Gets the position of the "current byte index" which
00976  *is basically the position of the last returned byte in the
00977  *input stream.
00978  *
00979  *Returns CR_OK upon successful completion. Otherwise,
00980  *CR_BAD_PARAMETER_ERROR if at least one of the arguments is invalid.
00981  *CR_START_OF_INPUT if no call to either cr_input_read_byte()
00982  *or cr_input_seek_index() have been issued before calling 
00983  *cr_input_get_cur_pos()
00984  *Note that the out parameters of this function are valid if and only if this
00985  *function returns CR_OK.
00986  */
00987 enum CRStatus
00988 cr_input_get_cur_pos (CRInput const * a_this, CRInputPos * a_pos)
00989 {
00990         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
00991                               CR_BAD_PARAM_ERROR);
00992 
00993         a_pos->next_byte_index = PRIVATE (a_this)->next_byte_index;
00994         a_pos->line = PRIVATE (a_this)->line;
00995         a_pos->col = PRIVATE (a_this)->col;
00996         a_pos->end_of_line = PRIVATE (a_this)->end_of_line;
00997         a_pos->end_of_file = PRIVATE (a_this)->end_of_input;
00998 
00999         return CR_OK;
01000 }
01001 
01002 /**
01003  * cr_input_get_parsing_location:
01004  *@a_this: the current instance of #CRInput
01005  *@a_loc: the set parsing location.
01006  *
01007  *Gets the current parsing location.
01008  *The Parsing location is a public datastructure that
01009  *represents the current line/column/byte offset/ in the input
01010  *stream.
01011  *
01012  *Returns CR_OK upon successful completion, an error
01013  *code otherwise.
01014  */
01015 enum CRStatus
01016 cr_input_get_parsing_location (CRInput const *a_this,
01017                                CRParsingLocation *a_loc)
01018 {
01019         g_return_val_if_fail (a_this 
01020                               && PRIVATE (a_this)
01021                               && a_loc, 
01022                               CR_BAD_PARAM_ERROR) ;
01023 
01024         a_loc->line = PRIVATE (a_this)->line ;
01025         a_loc->column = PRIVATE (a_this)->col ;
01026         if (PRIVATE (a_this)->next_byte_index) {
01027                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index - 1 ;
01028         } else {
01029                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index  ;
01030         }
01031         return CR_OK ;
01032 }
01033 
01034 /**
01035  * cr_input_get_cur_index:
01036  *@a_this: the "this pointer" of the current instance of
01037  *#CRInput
01038  *@a_index: out parameter. The returned index.
01039  *
01040  *Getter of the next byte index. 
01041  *It actually returns the index of the
01042  *next byte to be read.
01043  *
01044  *Returns CR_OK upon successful completion, an error code
01045  *otherwise.
01046  */
01047 enum CRStatus
01048 cr_input_get_cur_index (CRInput const * a_this, glong * a_index)
01049 {
01050         g_return_val_if_fail (a_this && PRIVATE (a_this)
01051                               && a_index, CR_BAD_PARAM_ERROR);
01052 
01053         *a_index = PRIVATE (a_this)->next_byte_index;
01054 
01055         return CR_OK;
01056 }
01057 
01058 /**
01059  * cr_input_set_cur_index:
01060  *@a_this: the "this pointer" of the current instance
01061  *of #CRInput .
01062  *@a_index: the new index to set.
01063  *
01064  *Setter of the next byte index.
01065  *It sets the index of the next byte to be read.
01066  *
01067  *Returns CR_OK upon successful completion, an error code otherwise.
01068  */
01069 enum CRStatus
01070 cr_input_set_cur_index (CRInput * a_this, glong a_index)
01071 {
01072         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01073 
01074         PRIVATE (a_this)->next_byte_index = a_index;
01075 
01076         return CR_OK;
01077 }
01078 
01079 /**
01080  * cr_input_set_end_of_file:
01081  *@a_this: the current instance of #CRInput.
01082  *@a_eof: the new end of file flag.
01083  *
01084  *Sets the end of file flag.
01085  *
01086  *Returns CR_OK upon successful completion, an error code otherwise.
01087  */
01088 enum CRStatus
01089 cr_input_set_end_of_file (CRInput * a_this, gboolean a_eof)
01090 {
01091         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01092 
01093         PRIVATE (a_this)->end_of_input = a_eof;
01094 
01095         return CR_OK;
01096 }
01097 
01098 /**
01099  * cr_input_get_end_of_file:
01100  *@a_this: the current instance of #CRInput.
01101  *@a_eof: out parameter the place to put the end of
01102  *file flag.
01103  *
01104  *Gets the end of file flag.
01105  *
01106  *Returns CR_OK upon successful completion, an error code otherwise.
01107  */
01108 enum CRStatus
01109 cr_input_get_end_of_file (CRInput const * a_this, gboolean * a_eof)
01110 {
01111         g_return_val_if_fail (a_this && PRIVATE (a_this)
01112                               && a_eof, CR_BAD_PARAM_ERROR);
01113 
01114         *a_eof = PRIVATE (a_this)->end_of_input;
01115 
01116         return CR_OK;
01117 }
01118 
01119 /**
01120  * cr_input_set_end_of_line:
01121  *@a_this: the current instance of #CRInput.
01122  *@a_eol: the new end of line flag.
01123  *
01124  *Sets the end of line flag.
01125  *
01126  *Returns CR_OK upon successful completion, an error code
01127  *otherwise.
01128  */
01129 enum CRStatus
01130 cr_input_set_end_of_line (CRInput * a_this, gboolean a_eol)
01131 {
01132         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01133 
01134         PRIVATE (a_this)->end_of_line = a_eol;
01135 
01136         return CR_OK;
01137 }
01138 
01139 /**
01140  * cr_input_get_end_of_line:
01141  *@a_this: the current instance of #CRInput
01142  *@a_eol: out parameter. The place to put
01143  *the returned flag
01144  *
01145  *Gets the end of line flag of the current input.
01146  *
01147  *Returns CR_OK upon successful completion, an error code
01148  *otherwise.
01149  */
01150 enum CRStatus
01151 cr_input_get_end_of_line (CRInput const * a_this, gboolean * a_eol)
01152 {
01153         g_return_val_if_fail (a_this && PRIVATE (a_this)
01154                               && a_eol, CR_BAD_PARAM_ERROR);
01155 
01156         *a_eol = PRIVATE (a_this)->end_of_line;
01157 
01158         return CR_OK;
01159 }
01160 
01161 /**
01162  * cr_input_set_cur_pos:
01163  *@a_this: the "this pointer" of the current instance of
01164  *#CRInput.
01165  *@a_pos: the new position.
01166  *
01167  *Sets the current position in the input stream.
01168  *
01169  * Returns CR_OK upon successful completion, an error code otherwise.
01170  */
01171 enum CRStatus
01172 cr_input_set_cur_pos (CRInput * a_this, CRInputPos const * a_pos)
01173 {
01174         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
01175                               CR_BAD_PARAM_ERROR);
01176 
01177         cr_input_set_column_num (a_this, a_pos->col);
01178         cr_input_set_line_num (a_this, a_pos->line);
01179         cr_input_set_cur_index (a_this, a_pos->next_byte_index);
01180         cr_input_set_end_of_line (a_this, a_pos->end_of_line);
01181         cr_input_set_end_of_file (a_this, a_pos->end_of_file);
01182 
01183         return CR_OK;
01184 }