Ruby  1.9.3p448(2013-06-27revision41675)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: naruse $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #ifndef NO_LOCALE_CHARMAP
18 #ifdef __CYGWIN__
19 #include <windows.h>
20 #endif
21 #ifdef HAVE_LANGINFO_H
22 #include <langinfo.h>
23 #endif
24 #endif
25 #include "ruby/util.h"
26 
27 #if defined __GNUC__ && __GNUC__ >= 4
28 #pragma GCC visibility push(default)
29 int rb_enc_register(const char *name, rb_encoding *encoding);
30 void rb_enc_set_base(const char *name, const char *orig);
31 void rb_encdb_declare(const char *name);
32 int rb_encdb_replicate(const char *name, const char *orig);
33 int rb_encdb_dummy(const char *name);
34 int rb_encdb_alias(const char *alias, const char *orig);
35 #pragma GCC visibility pop
36 #endif
37 
38 static ID id_encoding;
41 
43  const char *name;
46 };
47 
48 static struct {
50  int count;
51  int size;
53 } enc_table;
54 
55 void rb_enc_init(void);
56 
57 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
58 #define UNSPECIFIED_ENCODING INT_MAX
59 
60 #define ENCODING_NAMELEN_MAX 63
61 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
62 
63 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
64 
65 static int load_encoding(const char *name);
66 
67 static size_t
68 enc_memsize(const void *p)
69 {
70  return 0;
71 }
72 
74  "encoding",
75  {0, 0, enc_memsize,},
76 };
77 
78 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
79 
80 static VALUE
81 enc_new(rb_encoding *encoding)
82 {
83  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
84 }
85 
86 static VALUE
88 {
89  VALUE list, enc;
90 
91  if (!(list = rb_encoding_list)) {
92  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
93  }
94  enc = rb_ary_entry(list, idx);
95  if (NIL_P(enc)) {
96  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
97  }
98  return enc;
99 }
100 
101 VALUE
103 {
104  int idx;
105  if (!encoding) return Qnil;
106  idx = ENC_TO_ENCINDEX(encoding);
107  return rb_enc_from_encoding_index(idx);
108 }
109 
110 static int enc_autoload(rb_encoding *);
111 
112 static int
114 {
115  int index = rb_enc_to_index(enc);
116  if (rb_enc_from_index(index) != enc)
117  return -1;
118  if (enc_autoload_p(enc)) {
119  index = enc_autoload(enc);
120  }
121  return index;
122 }
123 
124 static int
126 {
127  if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
128  return -1;
129  }
130  return check_encoding(RDATA(obj)->data);
131 }
132 
133 static int
135 {
136  int index = enc_check_encoding(enc);
137  if (index < 0) {
138  rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
139  rb_obj_classname(enc));
140  }
141  return index;
142 }
143 
144 int
146 {
147  int idx;
148 
149  idx = enc_check_encoding(enc);
150  if (idx >= 0) {
151  return idx;
152  }
153  else if (NIL_P(enc = rb_check_string_type(enc))) {
154  return -1;
155  }
156  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
157  return -1;
158  }
159  return rb_enc_find_index(StringValueCStr(enc));
160 }
161 
162 static rb_encoding *
164 {
165  int idx;
166 
167  StringValue(enc);
168  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
169  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
170  }
172  if (idx < 0) {
173  rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
174  }
175  return rb_enc_from_index(idx);
176 }
177 
178 rb_encoding *
180 {
181  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
182  return to_encoding(enc);
183 }
184 
185 void
187 {
188 }
189 
190 static int
191 enc_table_expand(int newsize)
192 {
193  struct rb_encoding_entry *ent;
194  int count = newsize;
195 
196  if (enc_table.size >= newsize) return newsize;
197  newsize = (newsize + 7) / 8 * 8;
198  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
199  if (!ent) return -1;
200  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
201  enc_table.list = ent;
202  enc_table.size = newsize;
203  return count;
204 }
205 
206 static int
207 enc_register_at(int index, const char *name, rb_encoding *encoding)
208 {
209  struct rb_encoding_entry *ent = &enc_table.list[index];
210  VALUE list;
211 
212  if (!valid_encoding_name_p(name)) return -1;
213  if (!ent->name) {
214  ent->name = name = strdup(name);
215  }
216  else if (STRCASECMP(name, ent->name)) {
217  return -1;
218  }
219  if (!ent->enc) {
220  ent->enc = xmalloc(sizeof(rb_encoding));
221  }
222  if (encoding) {
223  *ent->enc = *encoding;
224  }
225  else {
226  memset(ent->enc, 0, sizeof(*ent->enc));
227  }
228  encoding = ent->enc;
229  encoding->name = name;
230  encoding->ruby_encoding_index = index;
231  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
232  list = rb_encoding_list;
233  if (list && NIL_P(rb_ary_entry(list, index))) {
234  /* initialize encoding data */
235  rb_ary_store(list, index, enc_new(encoding));
236  }
237  return index;
238 }
239 
240 static int
241 enc_register(const char *name, rb_encoding *encoding)
242 {
243  int index = enc_table.count;
244 
245  if ((index = enc_table_expand(index + 1)) < 0) return -1;
246  enc_table.count = index;
247  return enc_register_at(index - 1, name, encoding);
248 }
249 
250 static void set_encoding_const(const char *, rb_encoding *);
251 int rb_enc_registered(const char *name);
252 
253 int
254 rb_enc_register(const char *name, rb_encoding *encoding)
255 {
256  int index = rb_enc_registered(name);
257 
258  if (index >= 0) {
259  rb_encoding *oldenc = rb_enc_from_index(index);
260  if (STRCASECMP(name, rb_enc_name(oldenc))) {
261  index = enc_register(name, encoding);
262  }
263  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
264  enc_register_at(index, name, encoding);
265  }
266  else {
267  rb_raise(rb_eArgError, "encoding %s is already registered", name);
268  }
269  }
270  else {
271  index = enc_register(name, encoding);
273  }
274  return index;
275 }
276 
277 void
278 rb_encdb_declare(const char *name)
279 {
280  int idx = rb_enc_registered(name);
281  if (idx < 0) {
282  idx = enc_register(name, 0);
283  }
285 }
286 
287 static void
289 {
290  if (rb_enc_registered(name) >= 0) {
291  rb_raise(rb_eArgError, "encoding %s is already registered", name);
292  }
293 }
294 
295 static rb_encoding*
297 {
298  rb_encoding *enc = enc_table.list[index].enc;
299 
300  enc_table.list[index].base = base;
301  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
302  return enc;
303 }
304 
305 /* for encdb.h
306  * Set base encoding for encodings which are not replicas
307  * but not in their own files.
308  */
309 void
310 rb_enc_set_base(const char *name, const char *orig)
311 {
312  int idx = rb_enc_registered(name);
313  int origidx = rb_enc_registered(orig);
314  set_base_encoding(idx, rb_enc_from_index(origidx));
315 }
316 
317 int
318 rb_enc_replicate(const char *name, rb_encoding *encoding)
319 {
320  int idx;
321 
322  enc_check_duplication(name);
323  idx = enc_register(name, encoding);
324  set_base_encoding(idx, encoding);
326  return idx;
327 }
328 
329 /*
330  * call-seq:
331  * enc.replicate(name) -> encoding
332  *
333  * Returns a replicated encoding of _enc_ whose name is _name_.
334  * The new encoding should have the same byte structure of _enc_.
335  * If _name_ is used by another encoding, raise ArgumentError.
336  *
337  */
338 static VALUE
340 {
343  rb_to_encoding(encoding)));
344 }
345 
346 static int
347 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
348 {
349  if (idx < 0) {
350  idx = enc_register(name, origenc);
351  }
352  else {
353  idx = enc_register_at(idx, name, origenc);
354  }
355  if (idx >= 0) {
356  set_base_encoding(idx, origenc);
358  }
359  return idx;
360 }
361 
362 int
363 rb_encdb_replicate(const char *name, const char *orig)
364 {
365  int origidx = rb_enc_registered(orig);
366  int idx = rb_enc_registered(name);
367 
368  if (origidx < 0) {
369  origidx = enc_register(orig, 0);
370  }
371  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
372 }
373 
374 int
376 {
377  int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
378  rb_encoding *enc = enc_table.list[index].enc;
379 
380  ENC_SET_DUMMY(enc);
381  return index;
382 }
383 
384 int
385 rb_encdb_dummy(const char *name)
386 {
388  rb_enc_registered(name));
389  rb_encoding *enc = enc_table.list[index].enc;
390 
391  ENC_SET_DUMMY(enc);
392  return index;
393 }
394 
395 /*
396  * call-seq:
397  * enc.dummy? -> true or false
398  *
399  * Returns true for dummy encodings.
400  * A dummy encoding is an encoding for which character handling is not properly
401  * implemented.
402  * It is used for stateful encodings.
403  *
404  * Encoding::ISO_2022_JP.dummy? #=> true
405  * Encoding::UTF_8.dummy? #=> false
406  *
407  */
408 static VALUE
410 {
411  return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
412 }
413 
414 /*
415  * call-seq:
416  * enc.ascii_compatible? -> true or false
417  *
418  * Returns whether ASCII-compatible or not.
419  *
420  * Encoding::UTF_8.ascii_compatible? #=> true
421  * Encoding::UTF_16BE.ascii_compatible? #=> false
422  *
423  */
424 static VALUE
426 {
427  return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
428 }
429 
430 /*
431  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
432  */
433 int
435 {
436  const char *name = rb_enc_name(enc);
437  return name[0] == 'U' && name[1] == 'T' && name[2] == 'F' && name[4] != '7';
438 }
439 
440 /*
441  * Returns copied alias name when the key is added for st_table,
442  * else returns NULL.
443  */
444 static int
445 enc_alias_internal(const char *alias, int idx)
446 {
447  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
448  (st_data_t(*)(st_data_t))strdup);
449 }
450 
451 static int
452 enc_alias(const char *alias, int idx)
453 {
454  if (!valid_encoding_name_p(alias)) return -1;
455  if (!enc_alias_internal(alias, idx))
457  return idx;
458 }
459 
460 int
461 rb_enc_alias(const char *alias, const char *orig)
462 {
463  int idx;
464 
465  enc_check_duplication(alias);
466  if (!enc_table.list) {
467  rb_enc_init();
468  }
469  if ((idx = rb_enc_find_index(orig)) < 0) {
470  return -1;
471  }
472  return enc_alias(alias, idx);
473 }
474 
475 int
476 rb_encdb_alias(const char *alias, const char *orig)
477 {
478  int idx = rb_enc_registered(orig);
479 
480  if (idx < 0) {
481  idx = enc_register(orig, 0);
482  }
483  return enc_alias(alias, idx);
484 }
485 
486 enum {
491 };
492 
495 
496 void
498 {
500  if (!enc_table.names) {
502  }
503 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
506  ENC_REGISTER(US_ASCII);
507 #undef ENC_REGISTER
509 }
510 
511 rb_encoding *
513 {
514  if (!enc_table.list) {
515  rb_enc_init();
516  }
517  if (index < 0 || enc_table.count <= index) {
518  return 0;
519  }
520  return enc_table.list[index].enc;
521 }
522 
523 int
525 {
526  st_data_t idx = 0;
527 
528  if (!name) return -1;
529  if (!enc_table.list) return -1;
530  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
531  return (int)idx;
532  }
533  return -1;
534 }
535 
536 static VALUE
538 {
539  int safe = rb_safe_level();
540  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
541 }
542 
543 static int
544 load_encoding(const char *name)
545 {
546  VALUE enclib = rb_sprintf("enc/%s.so", name);
547  VALUE verbose = ruby_verbose;
549  VALUE loaded;
550  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
551  int idx;
552 
553  while (s < e) {
554  if (!ISALNUM(*s)) *s = '_';
555  else if (ISUPPER(*s)) *s = TOLOWER(*s);
556  ++s;
557  }
558  FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
559  OBJ_FREEZE(enclib);
561  ruby_debug = Qfalse;
562  loaded = rb_protect(require_enc, enclib, 0);
563  ruby_verbose = verbose;
564  ruby_debug = debug;
566  if (NIL_P(loaded)) return -1;
567  if ((idx = rb_enc_registered(name)) < 0) return -1;
568  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
569  return idx;
570 }
571 
572 static int
574 {
575  int i;
576  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
577 
578  if (base) {
579  i = 0;
580  do {
581  if (i >= enc_table.count) return -1;
582  } while (enc_table.list[i].enc != base && (++i, 1));
583  if (enc_autoload_p(base)) {
584  if (enc_autoload(base) < 0) return -1;
585  }
586  i = ENC_TO_ENCINDEX(enc);
587  enc_register_at(i, rb_enc_name(enc), base);
588  }
589  else {
590  i = load_encoding(rb_enc_name(enc));
591  }
592  return i;
593 }
594 
595 int
597 {
598  int i = rb_enc_registered(name);
599  rb_encoding *enc;
600 
601  if (i < 0) {
602  i = load_encoding(name);
603  }
604  else if (!(enc = rb_enc_from_index(i))) {
605  if (i != UNSPECIFIED_ENCODING) {
606  rb_raise(rb_eArgError, "encoding %s is not registered", name);
607  }
608  }
609  else if (enc_autoload_p(enc)) {
610  if (enc_autoload(enc) < 0) {
611  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
612  name);
613  return 0;
614  }
615  }
616  return i;
617 }
618 
619 rb_encoding *
620 rb_enc_find(const char *name)
621 {
622  int idx = rb_enc_find_index(name);
623  if (idx < 0) idx = 0;
624  return rb_enc_from_index(idx);
625 }
626 
627 static inline int
629 {
630  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
631  switch (BUILTIN_TYPE(obj)) {
632  case T_STRING:
633  case T_REGEXP:
634  case T_FILE:
635  return TRUE;
636  case T_DATA:
637  if (is_data_encoding(obj)) return TRUE;
638  default:
639  return FALSE;
640  }
641 }
642 
643 ID
645 {
646  CONST_ID(id_encoding, "encoding");
647  return id_encoding;
648 }
649 
650 int
652 {
653  int i = -1;
654  VALUE tmp;
655 
656  if (SPECIAL_CONST_P(obj)) {
657  if (!SYMBOL_P(obj)) return -1;
658  obj = rb_id2str(SYM2ID(obj));
659  }
660  switch (BUILTIN_TYPE(obj)) {
661  as_default:
662  default:
663  case T_STRING:
664  case T_REGEXP:
665  i = ENCODING_GET_INLINED(obj);
666  if (i == ENCODING_INLINE_MAX) {
667  VALUE iv;
668 
669  iv = rb_ivar_get(obj, rb_id_encoding());
670  i = NUM2INT(iv);
671  }
672  break;
673  case T_FILE:
674  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
675  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
676  else obj = tmp;
677  if (NIL_P(obj)) break;
678  case T_DATA:
679  if (is_data_encoding(obj)) {
680  i = enc_check_encoding(obj);
681  }
682  else {
683  goto as_default;
684  }
685  break;
686  }
687  return i;
688 }
689 
690 static void
691 enc_set_index(VALUE obj, int idx)
692 {
693  if (idx < ENCODING_INLINE_MAX) {
694  ENCODING_SET_INLINED(obj, idx);
695  return;
696  }
698  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
699 }
700 
701 void
702 rb_enc_set_index(VALUE obj, int idx)
703 {
704  rb_check_frozen(obj);
705  enc_set_index(obj, idx);
706 }
707 
708 VALUE
710 {
711 /* enc_check_capable(obj);*/
712  rb_check_frozen(obj);
713  if (rb_enc_get_index(obj) == idx)
714  return obj;
715  if (SPECIAL_CONST_P(obj)) {
716  rb_raise(rb_eArgError, "cannot set encoding");
717  }
718  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
720  ENC_CODERANGE_CLEAR(obj);
721  }
722  enc_set_index(obj, idx);
723  return obj;
724 }
725 
726 VALUE
728 {
729  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
730 }
731 
734 {
735  return rb_enc_from_index(rb_enc_get_index(obj));
736 }
737 
740 {
741  rb_encoding *enc = rb_enc_compatible(str1, str2);
742  if (!enc)
743  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
744  rb_enc_name(rb_enc_get(str1)),
745  rb_enc_name(rb_enc_get(str2)));
746  return enc;
747 }
748 
751 {
752  int idx1, idx2;
753  rb_encoding *enc1, *enc2;
754  int isstr1, isstr2;
755 
756  idx1 = rb_enc_get_index(str1);
757  idx2 = rb_enc_get_index(str2);
758 
759  if (idx1 < 0 || idx2 < 0)
760  return 0;
761 
762  if (idx1 == idx2) {
763  return rb_enc_from_index(idx1);
764  }
765  enc1 = rb_enc_from_index(idx1);
766  enc2 = rb_enc_from_index(idx2);
767 
768  isstr2 = RB_TYPE_P(str2, T_STRING);
769  if (isstr2 && RSTRING_LEN(str2) == 0)
770  return enc1;
771  isstr1 = RB_TYPE_P(str1, T_STRING);
772  if (isstr1 && RSTRING_LEN(str1) == 0)
773  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
774  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
775  return 0;
776  }
777 
778  /* objects whose encoding is the same of contents */
779  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
780  return enc1;
781  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
782  return enc2;
783 
784  if (!isstr1) {
785  VALUE tmp = str1;
786  int idx0 = idx1;
787  str1 = str2;
788  str2 = tmp;
789  idx1 = idx2;
790  idx2 = idx0;
791  idx0 = isstr1;
792  isstr1 = isstr2;
793  isstr2 = idx0;
794  }
795  if (isstr1) {
796  int cr1, cr2;
797 
798  cr1 = rb_enc_str_coderange(str1);
799  if (isstr2) {
800  cr2 = rb_enc_str_coderange(str2);
801  if (cr1 != cr2) {
802  /* may need to handle ENC_CODERANGE_BROKEN */
803  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
804  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
805  }
806  if (cr2 == ENC_CODERANGE_7BIT) {
807  return enc1;
808  }
809  }
810  if (cr1 == ENC_CODERANGE_7BIT)
811  return enc2;
812  }
813  return 0;
814 }
815 
816 void
818 {
820 }
821 
822 
823 /*
824  * call-seq:
825  * obj.encoding -> encoding
826  *
827  * Returns the Encoding object that represents the encoding of obj.
828  */
829 
830 VALUE
832 {
833  rb_encoding *enc = rb_enc_get(obj);
834  if (!enc) {
835  rb_raise(rb_eTypeError, "unknown encoding");
836  }
837  return rb_enc_from_encoding(enc);
838 }
839 
840 int
841 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
842 {
843  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
844 }
845 
846 int
847 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
848 {
849  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
850  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
851  return MBCLEN_CHARFOUND_LEN(n);
852  else {
853  int min = rb_enc_mbminlen(enc);
854  return min <= e-p ? min : (int)(e-p);
855  }
856 }
857 
858 int
859 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
860 {
861  int n;
862  if (e <= p)
864  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
865  if (e-p < n)
866  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
867  return n;
868 }
869 
870 int
871 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
872 {
873  unsigned int c, l;
874  if (e <= p)
875  return -1;
876  if (rb_enc_asciicompat(enc)) {
877  c = (unsigned char)*p;
878  if (!ISASCII(c))
879  return -1;
880  if (len) *len = 1;
881  return c;
882  }
883  l = rb_enc_precise_mbclen(p, e, enc);
884  if (!MBCLEN_CHARFOUND_P(l))
885  return -1;
886  c = rb_enc_mbc_to_codepoint(p, e, enc);
887  if (!rb_enc_isascii(c, enc))
888  return -1;
889  if (len) *len = l;
890  return c;
891 }
892 
893 unsigned int
894 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
895 {
896  int r;
897  if (e <= p)
898  rb_raise(rb_eArgError, "empty string");
899  r = rb_enc_precise_mbclen(p, e, enc);
900  if (MBCLEN_CHARFOUND_P(r)) {
901  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
902  return rb_enc_mbc_to_codepoint(p, e, enc);
903  }
904  else
905  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
906 }
907 
908 #undef rb_enc_codepoint
909 unsigned int
910 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
911 {
912  return rb_enc_codepoint_len(p, e, 0, enc);
913 }
914 
915 int
917 {
918  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
919  if (n == 0) {
920  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
921  }
922  return n;
923 }
924 
925 int
927 {
929 }
930 
931 int
933 {
935 }
936 
937 /*
938  * call-seq:
939  * enc.inspect -> string
940  *
941  * Returns a string which represents the encoding for programmers.
942  *
943  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
944  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
945  */
946 static VALUE
948 {
949  VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
951  (enc_dummy_p(self) ? " (dummy)" : ""));
953  return str;
954 }
955 
956 /*
957  * call-seq:
958  * enc.name -> string
959  *
960  * Returns the name of the encoding.
961  *
962  * Encoding::UTF_8.name #=> "UTF-8"
963  */
964 static VALUE
966 {
968 }
969 
970 static int
972 {
973  VALUE *arg = (VALUE *)args;
974 
975  if ((int)idx == (int)arg[0]) {
976  VALUE str = rb_usascii_str_new2((char *)name);
977  OBJ_FREEZE(str);
978  rb_ary_push(arg[1], str);
979  }
980  return ST_CONTINUE;
981 }
982 
983 /*
984  * call-seq:
985  * enc.names -> array
986  *
987  * Returns the list of name and aliases of the encoding.
988  *
989  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
990  */
991 static VALUE
993 {
994  VALUE args[2];
995 
996  args[0] = (VALUE)rb_to_encoding_index(self);
997  args[1] = rb_ary_new2(0);
998  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
999  return args[1];
1000 }
1001 
1002 /*
1003  * call-seq:
1004  * Encoding.list -> [enc1, enc2, ...]
1005  *
1006  * Returns the list of loaded encodings.
1007  *
1008  * Encoding.list
1009  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1010  * #<Encoding:ISO-2022-JP (dummy)>]
1011  *
1012  * Encoding.find("US-ASCII")
1013  * #=> #<Encoding:US-ASCII>
1014  *
1015  * Encoding.list
1016  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1017  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1018  *
1019  */
1020 static VALUE
1022 {
1023  VALUE ary = rb_ary_new2(0);
1025  return ary;
1026 }
1027 
1028 /*
1029  * call-seq:
1030  * Encoding.find(string) -> enc
1031  * Encoding.find(symbol) -> enc
1032  *
1033  * Search the encoding with specified <i>name</i>.
1034  * <i>name</i> should be a string or symbol.
1035  *
1036  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1037  * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
1038  *
1039  * Names which this method accept are encoding names and aliases
1040  * including following special aliases
1041  *
1042  * "external":: default external encoding
1043  * "internal":: default internal encoding
1044  * "locale":: locale encoding
1045  * "filesystem":: filesystem encoding
1046  *
1047  * An ArgumentError is raised when no encoding with <i>name</i>.
1048  * Only <code>Encoding.find("internal")</code> however returns nil
1049  * when no encoding named "internal", in other words, when Ruby has no
1050  * default internal encoding.
1051  */
1052 static VALUE
1054 {
1055  return rb_enc_from_encoding(rb_to_encoding(enc));
1056 }
1057 
1058 /*
1059  * call-seq:
1060  * Encoding.compatible?(obj1, obj2) -> enc or nil
1061  *
1062  * Checks the compatibility of two objects.
1063  *
1064  * If the objects are both strings they are compatible when they are
1065  * concatenatable. The encoding of the concatenated string will be returned
1066  * if they are compatible, nil if they are not.
1067  *
1068  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1069  * #=> #<Encoding:ISO-8859-1>
1070  *
1071  * Encoding.compatible?(
1072  * "\xa1".force_encoding("iso-8859-1"),
1073  * "\xa1\xa1".force_encoding("euc-jp"))
1074  * #=> nil
1075  *
1076  * If the objects are non-strings their encodings are compatible when they
1077  * have an encoding and:
1078  * * Either encoding is US-ASCII compatible
1079  * * One of the encodings is a 7-bit encoding
1080  *
1081  */
1082 static VALUE
1083 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1084 {
1085  rb_encoding *enc;
1086 
1087  if (!enc_capable(str1)) return Qnil;
1088  if (!enc_capable(str2)) return Qnil;
1089  enc = rb_enc_compatible(str1, str2);
1090  if (!enc) return Qnil;
1091  return rb_enc_from_encoding(enc);
1092 }
1093 
1094 /* :nodoc: */
1095 static VALUE
1097 {
1098  rb_scan_args(argc, argv, "01", 0);
1099  return enc_name(self);
1100 }
1101 
1102 /* :nodoc: */
1103 static VALUE
1104 enc_load(VALUE klass, VALUE str)
1105 {
1106  return enc_find(klass, str);
1107 }
1108 
1109 rb_encoding *
1111 {
1112  if (!enc_table.list) {
1113  rb_enc_init();
1114  }
1115  return enc_table.list[ENCINDEX_ASCII].enc;
1116 }
1117 
1118 int
1120 {
1121  return ENCINDEX_ASCII;
1122 }
1123 
1124 rb_encoding *
1126 {
1127  if (!enc_table.list) {
1128  rb_enc_init();
1129  }
1130  return enc_table.list[ENCINDEX_UTF_8].enc;
1131 }
1132 
1133 int
1135 {
1136  return ENCINDEX_UTF_8;
1137 }
1138 
1139 rb_encoding *
1141 {
1142  if (!enc_table.list) {
1143  rb_enc_init();
1144  }
1145  return enc_table.list[ENCINDEX_US_ASCII].enc;
1146 }
1147 
1148 int
1150 {
1151  return ENCINDEX_US_ASCII;
1152 }
1153 
1154 int
1156 {
1158  int idx;
1159 
1160  if (NIL_P(charmap))
1161  idx = rb_usascii_encindex();
1162  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1163  idx = rb_ascii8bit_encindex();
1164 
1165  if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
1166 
1167  return idx;
1168 }
1169 
1170 rb_encoding *
1172 {
1174 }
1175 
1176 static int
1178 {
1179  int idx;
1180 #if defined NO_LOCALE_CHARMAP
1182 #elif defined _WIN32 || defined __CYGWIN__
1183  char cp[sizeof(int) * 8 / 3 + 4];
1184  snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
1185  idx = rb_enc_find_index(cp);
1186  if (idx < 0) idx = rb_ascii8bit_encindex();
1187 #else
1189 #endif
1190 
1191  enc_alias_internal("filesystem", idx);
1192  return idx;
1193 }
1194 
1195 int
1197 {
1198  int idx = rb_enc_registered("filesystem");
1199  if (idx < 0)
1200  idx = rb_ascii8bit_encindex();
1201  return idx;
1202 }
1203 
1204 rb_encoding *
1206 {
1208 }
1209 
1211  int index; /* -2 => not yet set, -1 => nil */
1213 };
1214 
1216 
1217 static int
1218 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1219 {
1220  int overridden = FALSE;
1221 
1222  if (def->index != -2)
1223  /* Already set */
1224  overridden = TRUE;
1225 
1226  if (NIL_P(encoding)) {
1227  def->index = -1;
1228  def->enc = 0;
1229  st_insert(enc_table.names, (st_data_t)strdup(name),
1231  }
1232  else {
1233  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1234  def->enc = 0;
1235  enc_alias_internal(name, def->index);
1236  }
1237 
1238  if (def == &default_external)
1240 
1241  return overridden;
1242 }
1243 
1244 rb_encoding *
1246 {
1247  if (default_external.enc) return default_external.enc;
1248 
1249  if (default_external.index >= 0) {
1250  default_external.enc = rb_enc_from_index(default_external.index);
1251  return default_external.enc;
1252  }
1253  else {
1254  return rb_locale_encoding();
1255  }
1256 }
1257 
1258 VALUE
1260 {
1262 }
1263 
1264 /*
1265  * call-seq:
1266  * Encoding.default_external -> enc
1267  *
1268  * Returns default external encoding.
1269  *
1270  * The default external encoding is used by default for strings created from
1271  * the following locations:
1272  *
1273  * * CSV
1274  * * File data read from disk
1275  * * SDBM
1276  * * StringIO
1277  * * Zlib::GzipReader
1278  * * Zlib::GzipWriter
1279  * * String#inspect
1280  * * Regexp#inspect
1281  *
1282  * While strings created from these locations will have this encoding, the
1283  * encoding may not be valid. Be sure to check String#valid_encoding?.
1284  *
1285  * File data written to disk will be transcoded to the default external
1286  * encoding when written.
1287  *
1288  * The default external encoding is initialized by the locale or -E option.
1289  */
1290 static VALUE
1292 {
1293  return rb_enc_default_external();
1294 }
1295 
1296 void
1298 {
1299  if (NIL_P(encoding)) {
1300  rb_raise(rb_eArgError, "default external can not be nil");
1301  }
1302  enc_set_default_encoding(&default_external, encoding,
1303  "external");
1304 }
1305 
1306 /*
1307  * call-seq:
1308  * Encoding.default_external = enc
1309  *
1310  * Sets default external encoding. You should not set
1311  * Encoding::default_external in ruby code as strings created before changing
1312  * the value may have a different encoding from strings created after thevalue
1313  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1314  * the correct default_external.
1315  *
1316  * See Encoding::default_external for information on how the default external
1317  * encoding is used.
1318  */
1319 static VALUE
1321 {
1322  rb_warning("setting Encoding.default_external");
1323  rb_enc_set_default_external(encoding);
1324  return encoding;
1325 }
1326 
1327 static struct default_encoding default_internal = {-2};
1328 
1329 rb_encoding *
1331 {
1332  if (!default_internal.enc && default_internal.index >= 0) {
1333  default_internal.enc = rb_enc_from_index(default_internal.index);
1334  }
1335  return default_internal.enc; /* can be NULL */
1336 }
1337 
1338 VALUE
1340 {
1341  /* Note: These functions cope with default_internal not being set */
1343 }
1344 
1345 /*
1346  * call-seq:
1347  * Encoding.default_internal -> enc
1348  *
1349  * Returns default internal encoding. Strings will be transcoded to the
1350  * default internal encoding in the following places if the default internal
1351  * encoding is not nil:
1352  *
1353  * * CSV
1354  * * Etc.sysconfdir and Etc.systmpdir
1355  * * File data read from disk
1356  * * File names from Dir
1357  * * Integer#chr
1358  * * String#inspect and Regexp#inspect
1359  * * Strings returned from Curses
1360  * * Strings returned from Readline
1361  * * Strings returned from SDBM
1362  * * Time#zone
1363  * * Values from ENV
1364  * * Values in ARGV including $PROGRAM_NAME
1365  * * __FILE__
1366  *
1367  * Additionally String#encode and String#encode! use the default internal
1368  * encoding if no encoding is given.
1369  *
1370  * The locale encoding (__ENCODING__), not default_internal, is used as the
1371  * encoding of created strings.
1372  *
1373  * Encoding::default_internal is initialized by the source file's
1374  * internal_encoding or -E option.
1375  */
1376 static VALUE
1378 {
1379  return rb_enc_default_internal();
1380 }
1381 
1382 void
1384 {
1385  enc_set_default_encoding(&default_internal, encoding,
1386  "internal");
1387 }
1388 
1389 /*
1390  * call-seq:
1391  * Encoding.default_internal = enc or nil
1392  *
1393  * Sets default internal encoding or removes default internal encoding when
1394  * passed nil. You should not set Encoding::default_internal in ruby code as
1395  * strings created before changing the value may have a different encoding
1396  * from strings created after the change. Instead you should use
1397  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1398  *
1399  * See Encoding::default_internal for information on how the default internal
1400  * encoding is used.
1401  */
1402 static VALUE
1404 {
1405  rb_warning("setting Encoding.default_internal");
1406  rb_enc_set_default_internal(encoding);
1407  return encoding;
1408 }
1409 
1410 /*
1411  * call-seq:
1412  * Encoding.locale_charmap -> string
1413  *
1414  * Returns the locale charmap name.
1415  * It returns nil if no appropriate information.
1416  *
1417  * Debian GNU/Linux
1418  * LANG=C
1419  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1420  * LANG=ja_JP.EUC-JP
1421  * Encoding.locale_charmap #=> "EUC-JP"
1422  *
1423  * SunOS 5
1424  * LANG=C
1425  * Encoding.locale_charmap #=> "646"
1426  * LANG=ja
1427  * Encoding.locale_charmap #=> "eucJP"
1428  *
1429  * The result is highly platform dependent.
1430  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1431  * If you need some encoding object even for unknown locale,
1432  * Encoding.find("locale") can be used.
1433  *
1434  */
1435 VALUE
1437 {
1438 #if defined NO_LOCALE_CHARMAP
1439  return rb_usascii_str_new2("ASCII-8BIT");
1440 #elif defined _WIN32 || defined __CYGWIN__
1441  const char *nl_langinfo_codeset(void);
1442  const char *codeset = nl_langinfo_codeset();
1443  char cp[sizeof(int) * 3 + 4];
1444  if (!codeset) {
1445  UINT codepage = GetConsoleCP();
1446  if(!codepage) codepage = GetACP();
1447  snprintf(cp, sizeof(cp), "CP%d", codepage);
1448  codeset = cp;
1449  }
1450  return rb_usascii_str_new2(codeset);
1451 #elif defined HAVE_LANGINFO_H
1452  char *codeset;
1453  codeset = nl_langinfo(CODESET);
1454  return rb_usascii_str_new2(codeset);
1455 #else
1456  return Qnil;
1457 #endif
1458 }
1459 
1460 static void
1462 {
1463  VALUE encoding = rb_enc_from_encoding(enc);
1464  char *s = (char *)name;
1465  int haslower = 0, hasupper = 0, valid = 0;
1466 
1467  if (ISDIGIT(*s)) return;
1468  if (ISUPPER(*s)) {
1469  hasupper = 1;
1470  while (*++s && (ISALNUM(*s) || *s == '_')) {
1471  if (ISLOWER(*s)) haslower = 1;
1472  }
1473  }
1474  if (!*s) {
1475  if (s - name > ENCODING_NAMELEN_MAX) return;
1476  valid = 1;
1477  rb_define_const(rb_cEncoding, name, encoding);
1478  }
1479  if (!valid || haslower) {
1480  size_t len = s - name;
1481  if (len > ENCODING_NAMELEN_MAX) return;
1482  if (!haslower || !hasupper) {
1483  do {
1484  if (ISLOWER(*s)) haslower = 1;
1485  if (ISUPPER(*s)) hasupper = 1;
1486  } while (*++s && (!haslower || !hasupper));
1487  len = s - name;
1488  }
1489  len += strlen(s);
1490  if (len++ > ENCODING_NAMELEN_MAX) return;
1491  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1492  name = s;
1493  if (!valid) {
1494  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1495  for (; *s; ++s) {
1496  if (!ISALNUM(*s)) *s = '_';
1497  }
1498  if (hasupper) {
1499  rb_define_const(rb_cEncoding, name, encoding);
1500  }
1501  }
1502  if (haslower) {
1503  for (s = (char *)name; *s; ++s) {
1504  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1505  }
1506  rb_define_const(rb_cEncoding, name, encoding);
1507  }
1508  }
1509 }
1510 
1511 static int
1513 {
1514  VALUE ary = (VALUE)arg;
1515  VALUE str = rb_usascii_str_new2((char *)name);
1516  OBJ_FREEZE(str);
1517  rb_ary_push(ary, str);
1518  return ST_CONTINUE;
1519 }
1520 
1521 /*
1522  * call-seq:
1523  * Encoding.name_list -> ["enc1", "enc2", ...]
1524  *
1525  * Returns the list of available encoding names.
1526  *
1527  * Encoding.name_list
1528  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1529  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1530  * "Windows-31J",
1531  * "BINARY", "CP932", "eucJP"]
1532  *
1533  */
1534 
1535 static VALUE
1537 {
1538  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1540  return ary;
1541 }
1542 
1543 static int
1545 {
1546  VALUE *p = (VALUE *)arg;
1547  VALUE aliases = p[0], ary = p[1];
1548  int idx = (int)orig;
1549  VALUE key, str = rb_ary_entry(ary, idx);
1550 
1551  if (NIL_P(str)) {
1553 
1554  if (!enc) return ST_CONTINUE;
1555  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1556  return ST_CONTINUE;
1557  }
1558  str = rb_usascii_str_new2(rb_enc_name(enc));
1559  OBJ_FREEZE(str);
1560  rb_ary_store(ary, idx, str);
1561  }
1562  key = rb_usascii_str_new2((char *)name);
1563  OBJ_FREEZE(key);
1564  rb_hash_aset(aliases, key, str);
1565  return ST_CONTINUE;
1566 }
1567 
1568 /*
1569  * call-seq:
1570  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1571  *
1572  * Returns the hash of available encoding alias and original encoding name.
1573  *
1574  * Encoding.aliases
1575  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1576  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1577  *
1578  */
1579 
1580 static VALUE
1582 {
1583  VALUE aliases[2];
1584  aliases[0] = rb_hash_new();
1585  aliases[1] = rb_ary_new();
1587  return aliases[0];
1588 }
1589 
1590 void
1592 {
1593 #undef rb_intern
1594 #define rb_intern(str) rb_intern_const(str)
1595  VALUE list;
1596  int i;
1597 
1598  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1601  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1602  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1603  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1604  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1605  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1606  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1607  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1613 
1614  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1616 
1622 
1623  list = rb_ary_new2(enc_table.count);
1624  RBASIC(list)->klass = 0;
1627 
1628  for (i = 0; i < enc_table.count; ++i) {
1629  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1630  }
1631 }
1632 
1633 /* locale insensitive ctype functions */
1634 
1635 #define ctype_test(c, ctype) \
1636  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1637 
1638 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1639 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1640 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1641 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1642 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1643 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1644 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1645 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1646 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1647 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1648 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1649 int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
1650 
1651 int
1653 {
1654  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
1655 }
1656 
1657 int
1659 {
1660  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
1661 }
1662 
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:691
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1512
rb_encoding OnigEncodingUS_ASCII
#define RSTRING_LEN(string)
Definition: generator.h:45
#define ONIGENC_CTYPE_BLANK
Definition: oniguruma.h:193
#define ONIGENC_CTYPE_PUNCT
Definition: oniguruma.h:199
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:916
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:651
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:135
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:739
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:956
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:136
void rb_bug(const char *fmt,...)
Definition: error.c:265
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:152
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:817
#define FALSE
Definition: nkf.h:185
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:310
size_t strlen(const char *)
int i
Definition: win32ole.c:776
Definition: st.h:77
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:73
VALUE rb_id2str(ID id)
Definition: ripper.c:15432
VALUE rb_cEncoding
Definition: encoding.c:39
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1104
#define NUM2INT(x)
Definition: ruby.h:536
int count
Definition: encoding.c:50
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:345
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1342
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1544
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:947
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:296
#define FL_TAINT
Definition: ruby.h:925
#define CLASS_OF(v)
Definition: ruby.h:376
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1536
static VALUE enc_list(VALUE klass)
Definition: encoding.c:1021
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:207
#define st_foreach
Definition: regint.h:150
int rb_toupper(int c)
Definition: encoding.c:1658
#define Qtrue
Definition: ruby.h:366
void Init_Encoding(void)
Definition: encoding.c:1591
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:826
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:179
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:102
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:932
const char * nl_langinfo_codeset(void)
Definition: langinfo.c:64
VALUE rb_eTypeError
Definition: error.c:467
static rb_encoding * to_encoding(VALUE enc)
Definition: encoding.c:163
st_table * names
Definition: encoding.c:52
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1330
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:740
int rb_usascii_encindex(void)
Definition: encoding.c:1149
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:750
static VALUE enc_names(VALUE self)
Definition: encoding.c:992
static VALUE INT2NUM(int v)
Definition: ruby.h:981
static struct @5 enc_table
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:638
int rb_isblank(int c)
Definition: encoding.c:1640
static int enc_table_expand(int newsize)
Definition: encoding.c:191
void * realloc()
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:704
#define RSTRING_PTR(string)
Definition: generator.h:42
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:46
int rb_enc_str_coderange(VALUE)
Definition: string.c:324
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:894
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:81
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1574
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1026
#define ONIGENC_CTYPE_CNTRL
Definition: oniguruma.h:194
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1297
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:727
int rb_isupper(int c)
Definition: encoding.c:1648
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1581
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1320
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:847
int rb_isprint(int c)
Definition: encoding.c:1645
#define DATA_PTR(dta)
Definition: ruby.h:795
VALUE rb_locale_charmap(VALUE klass)
Definition: encoding.c:1436
const char * alias
Definition: nkf.c:1127
#define ENC_SET_DUMMY(enc)
Definition: encoding.h:228
#define ENC_REGISTER(enc)
#define st_lookup
Definition: regint.h:149
int rb_enc_registered(const char *name)
Definition: encoding.c:524
ID rb_id_encoding(void)
Definition: encoding.c:644
int rb_isdigit(int c)
Definition: encoding.c:1642
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:445
#define ISDIGIT(c)
#define ONIGENC_CTYPE_ALNUM
Definition: oniguruma.h:204
#define ONIGENC_IS_ASCII_CODE(code)
int rb_filesystem_encindex(void)
Definition: encoding.c:1196
void rb_enc_init(void)
Definition: encoding.c:497
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1125
#define RDATA(obj)
Definition: ruby.h:913
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1227
static int min(int a, int b)
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:192
#define ENC_CODERANGE_7BIT
Definition: encoding.h:58
const char * rb_obj_classname(VALUE)
Definition: variable.c:318
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:409
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:87
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:926
Win32OLEIDispatch * p
Definition: win32ole.c:778
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: nkf.c:87
#define ONIGENC_CTYPE_UPPER
Definition: oniguruma.h:201
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
int args
Definition: win32ole.c:777
st_table * st_init_strcasetable(void)
Definition: st.c:229
#define ctype_test(c, ctype)
Definition: encoding.c:1635
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1353
#define FL_UNTRUSTED
Definition: ruby.h:926
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:145
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1245
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:195
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:259
VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val)
Definition: hash.c:1123
int rb_isxdigit(int c)
Definition: encoding.c:1649
int rb_ispunct(int c)
Definition: encoding.c:1646
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:841
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1246
Definition: encoding.c:42
#define ONIGENC_CTYPE_XDIGIT
Definition: oniguruma.h:202
#define SYM2ID(v)
Definition: cparse.c:66
#define rb_enc_isascii(c, enc)
Definition: encoding.h:171
#define RSTRING_END(str)
Definition: ruby.h:680
int rb_isspace(int c)
Definition: encoding.c:1647
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:430
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:2818
VALUE rb_ary_new(void)
Definition: array.c:339
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1119
VALUE rb_enc_default_external(void)
Definition: encoding.c:1259
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:374
#define ISASCII(c)
Definition: ruby.h:1450
static VALUE enc_name(VALUE self)
Definition: encoding.c:965
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:62
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:468
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:702
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:318
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:1923
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:635
#define ENCODING_COUNT
Definition: encoding.c:57
#define ISALNUM(c)
Definition: ruby.h:1456
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1461
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1096
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:476
int argc
Definition: ruby.c:120
#define Qfalse
Definition: ruby.h:365
VALUE rb_require_safe(VALUE, int)
Definition: load.c:591
int rb_locale_encindex(void)
Definition: encoding.c:1155
#define ALLOCA_N(type, n)
Definition: ruby.h:1038
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:969
#define ISUPPER(c)
Definition: ruby.h:1454
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1053
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:709
VALUE rb_eEncCompatError
Definition: error.c:474
arg
Definition: ripper.y:1283
#define OBJ_FREEZE(x)
Definition: ruby.h:970
#define rb_enc_mbminlen(enc)
Definition: encoding.h:124
#define ISLOWER(c)
Definition: ruby.h:1455
const char * name
Definition: encoding.c:43
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: oniguruma.h:250
int rb_isgraph(int c)
Definition: encoding.c:1643
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:265
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1218
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:871
int rb_encdb_dummy(const char *name)
Definition: encoding.c:385
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:241
#define TRUE
Definition: nkf.h:186
#define T_DATA
Definition: ruby.h:428
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:125
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1203
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:859
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:434
#define rb_enc_name(enc)
Definition: encoding.h:121
VALUE rb_hash_new(void)
Definition: hash.c:229
#define strdup(s)
Definition: util.h:69
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1415
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1038
#define ONIGENC_CTYPE_PRINT
Definition: oniguruma.h:198
unsigned long ID
Definition: ruby.h:89
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1140
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:363
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:60
#define Qnil
Definition: ruby.h:367
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:375
static struct default_encoding default_internal
Definition: encoding.c:1327
const char * name
Definition: oniguruma.h:158
#define BUILTIN_TYPE(x)
Definition: ruby.h:438
#define debug(x)
Definition: _sdbm.c:56
unsigned long VALUE
Definition: ruby.h:88
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1083
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1171
#define RBASIC(obj)
Definition: ruby.h:904
int rb_utf8_encindex(void)
Definition: encoding.c:1134
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:33
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:831
register unsigned int len
Definition: name2ctype.h:22210
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:231
static int enc_autoload(rb_encoding *)
Definition: encoding.c:573
#define rb_enc_asciicompat(enc)
Definition: encoding.h:181
#define ONIGENC_CTYPE_SPACE
Definition: oniguruma.h:200
int rb_islower(int c)
Definition: encoding.c:1644
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1403
long st_data_t
Definition: syck.h:69
#define enc_autoload_p(enc)
Definition: encoding.c:63
#define FL_UNSET(x, f)
Definition: ruby.h:960
#define UChar
Definition: oniguruma.h:107
static void enc_check_duplication(const char *name)
Definition: encoding.c:288
#define StringValueCStr(v)
Definition: ruby.h:468
static size_t enc_memsize(const void *p)
Definition: encoding.c:68
#define SYMBOL_P(v)
Definition: cparse.c:69
static ID id_encoding
Definition: encoding.c:38
VALUE rb_usascii_str_new2(const char *)
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:971
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:733
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1291
int size
Definition: encoding.c:51
static struct default_encoding default_external
Definition: encoding.c:1215
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:254
#define ENCODING_INLINE_MAX
Definition: encoding.h:29
#define xmalloc
Definition: defines.h:64
void rb_set_errinfo(VALUE err)
Definition: eval.c:1065
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:339
rb_encoding * enc
Definition: encoding.c:1212
static int enc_set_filesystem_encoding(void)
Definition: encoding.c:1177
VALUE rb_check_string_type(VALUE)
Definition: string.c:1449
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1383
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:425
uint8_t key[16]
Definition: random.c:1284
#define valid_encoding_name_p(name)
Definition: encoding.c:61
#define T_STRING
Definition: ruby.h:418
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:461
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:537
#define is_data_encoding(obj)
Definition: encoding.c:78
struct rb_encoding_entry * list
Definition: encoding.c:49
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1205
static int enc_capable(VALUE obj)
Definition: encoding.c:628
#define T_FILE
Definition: ruby.h:424
static const rb_data_type_t encoding_data_type
Definition: encoding.c:73
#define rb_isascii(c)
Definition: ruby.h:1433
#define TOLOWER(c)
Definition: ruby.h:1462
int rb_isalnum(int c)
Definition: encoding.c:1638
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
#define st_insert
Definition: regint.h:148
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1339
VALUE rb_ary_new2(long capa)
Definition: array.c:332
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:113
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1377
#define rb_safe_level()
Definition: tcltklib.c:90
int rb_tolower(int c)
Definition: encoding.c:1652
#define ruby_debug
Definition: ruby.h:1170
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.h:225
const char * name
Definition: nkf.c:208
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:347
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:910
#define STRCASECMP(s1, s2)
Definition: ruby.h:1466
#define rb_intern(str)
#define rb_enc_to_index(enc)
Definition: encoding.h:86
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:452
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1110
void rb_warning(const char *fmt,...)
Definition: error.c:212
int rb_enc_find_index(const char *name)
Definition: encoding.c:596
int rb_iscntrl(int c)
Definition: encoding.c:1641
#define rb_check_frozen(obj)
Definition: intern.h:242
#define CONST_ID(var, str)
Definition: ruby.h:1127
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:254
#define SPECIAL_CONST_P(x)
Definition: ruby.h:953
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:40
#define ONIGENC_CTYPE_GRAPH
Definition: oniguruma.h:196
void rb_encdb_declare(const char *name)
Definition: encoding.c:278
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:337
void rb_gc_mark_encodings(void)
Definition: encoding.c:186
#define UNSPECIFIED_ENCODING
Definition: encoding.c:58
rb_encoding OnigEncodingUTF_8
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1209
#define ruby_verbose
Definition: ruby.h:1169
#define ONIGENC_CTYPE_LOWER
Definition: oniguruma.h:197
void rb_warn(const char *fmt,...)
Definition: error.c:196
rb_encoding * enc
Definition: encoding.c:44
VALUE rb_eArgError
Definition: error.c:468
static int load_encoding(const char *name)
Definition: encoding.c:544
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:620
#define T_REGEXP
Definition: ruby.h:419
char ** argv
Definition: ruby.c:121
#define StringValue(v)
Definition: ruby.h:466
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1053
#define ENC_DUMMY_P(enc)
Definition: encoding.h:227
int rb_isalpha(int c)
Definition: encoding.c:1639
static int must_encoding(VALUE enc)
Definition: encoding.c:134
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:512
rb_encoding * base
Definition: encoding.c:45