Libcroco
|
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8 -*- */ 00002 00003 /* 00004 * This file is part of The Croco Library 00005 * 00006 * Copyright (C) 2002-2003 Dodji Seketeli <dodji@seketeli.org> 00007 * 00008 * This program is free software; you can redistribute it and/or 00009 * modify it under the terms of version 2.1 of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00020 * USA 00021 */ 00022 00023 /* 00024 *$Id$ 00025 */ 00026 00027 /** 00028 *@file 00029 *The definition of the #CREncHandler class. 00030 */ 00031 00032 #include "cr-enc-handler.h" 00033 #include "cr-utils.h" 00034 00035 #include <string.h> 00036 00037 struct CREncAlias { 00038 const gchar *name; 00039 enum CREncoding encoding; 00040 }; 00041 00042 static struct CREncAlias gv_default_aliases[] = { 00043 {"UTF-8", CR_UTF_8}, 00044 {"UTF_8", CR_UTF_8}, 00045 {"UTF8", CR_UTF_8}, 00046 {"UTF-16", CR_UTF_16}, 00047 {"UTF_16", CR_UTF_16}, 00048 {"UTF16", CR_UTF_16}, 00049 {"UCS1", CR_UCS_1}, 00050 {"UCS-1", CR_UCS_1}, 00051 {"UCS_1", CR_UCS_1}, 00052 {"ISO-8859-1", CR_UCS_1}, 00053 {"ISO_8859-1", CR_UCS_1}, 00054 {"UCS-1", CR_UCS_1}, 00055 {"UCS_1", CR_UCS_1}, 00056 {"UCS4", CR_UCS_4}, 00057 {"UCS-4", CR_UCS_4}, 00058 {"UCS_4", CR_UCS_4}, 00059 {"ASCII", CR_ASCII}, 00060 {0, 0} 00061 }; 00062 00063 static CREncHandler gv_default_enc_handlers[] = { 00064 {CR_UCS_1, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00065 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00066 00067 {CR_ISO_8859_1, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00068 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00069 00070 {CR_ASCII, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00071 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00072 00073 {0, NULL, NULL, NULL, NULL} 00074 }; 00075 00076 /** 00077 * cr_enc_handler_get_instance: 00078 *@a_enc: the encoding of the Handler. 00079 * 00080 *Gets the instance of encoding handler. 00081 *This function implements a singleton pattern. 00082 * 00083 *Returns the instance of #CREncHandler. 00084 */ 00085 CREncHandler * 00086 cr_enc_handler_get_instance (enum CREncoding a_enc) 00087 { 00088 gulong i = 0; 00089 00090 for (i = 0; gv_default_enc_handlers[i].encoding; i++) { 00091 if (gv_default_enc_handlers[i].encoding == a_enc) { 00092 return (CREncHandler *) & gv_default_enc_handlers[i]; 00093 } 00094 } 00095 00096 return NULL; 00097 } 00098 00099 /** 00100 * cr_enc_handler_resolve_enc_alias: 00101 *@a_alias_name: the encoding name. 00102 *@a_enc: output param. The returned encoding type 00103 *or 0 if the alias is not supported. 00104 * 00105 *Given an encoding name (called an alias name) 00106 *the function returns the matching encoding type. 00107 * 00108 *Returns CR_OK upon successfull completion, an error code otherwise. 00109 */ 00110 enum CRStatus 00111 cr_enc_handler_resolve_enc_alias (const guchar * a_alias_name, 00112 enum CREncoding *a_enc) 00113 { 00114 gulong i = 0; 00115 guchar *alias_name_up = NULL; 00116 enum CRStatus status = CR_ENCODING_NOT_FOUND_ERROR; 00117 00118 g_return_val_if_fail (a_alias_name != NULL, CR_BAD_PARAM_ERROR); 00119 00120 alias_name_up = (guchar *) g_ascii_strup ((const gchar *) a_alias_name, -1); 00121 00122 for (i = 0; gv_default_aliases[i].name; i++) { 00123 if (!strcmp (gv_default_aliases[i].name, (const gchar *) alias_name_up)) { 00124 *a_enc = gv_default_aliases[i].encoding; 00125 status = CR_OK; 00126 break; 00127 } 00128 } 00129 00130 return status; 00131 } 00132 00133 /** 00134 * cr_enc_handler_convert_input: 00135 *@a_this: the current instance of #CREncHandler. 00136 *@a_in: the input buffer to convert. 00137 *@a_in_len: in/out parameter. The len of the input 00138 *buffer to convert. After return, contains the number of 00139 *bytes actually consumed. 00140 *@a_out: output parameter. The converted output buffer. 00141 *Must be freed by the buffer. 00142 *@a_out_len: output parameter. The length of the output buffer. 00143 * 00144 *Converts a raw input buffer into an utf8 buffer. 00145 * 00146 *Returns CR_OK upon successfull completion, an error code otherwise. 00147 */ 00148 enum CRStatus 00149 cr_enc_handler_convert_input (CREncHandler * a_this, 00150 const guchar * a_in, 00151 gulong * a_in_len, 00152 guchar ** a_out, gulong * a_out_len) 00153 { 00154 enum CRStatus status = CR_OK; 00155 00156 g_return_val_if_fail (a_this && a_in && a_in_len && a_out, 00157 CR_BAD_PARAM_ERROR); 00158 00159 if (a_this->decode_input == NULL) 00160 return CR_OK; 00161 00162 if (a_this->enc_str_len_as_utf8) { 00163 status = a_this->enc_str_len_as_utf8 (a_in, 00164 &a_in[*a_in_len - 1], 00165 a_out_len); 00166 00167 g_return_val_if_fail (status == CR_OK, status); 00168 } else { 00169 *a_out_len = *a_in_len; 00170 } 00171 00172 *a_out = g_malloc0 (*a_out_len); 00173 00174 status = a_this->decode_input (a_in, a_in_len, *a_out, a_out_len); 00175 00176 if (status != CR_OK) { 00177 g_free (*a_out); 00178 *a_out = NULL; 00179 } 00180 00181 g_return_val_if_fail (status == CR_OK, status); 00182 00183 return CR_OK; 00184 }