Ruby 3.3.5p100 (2024-09-03 revision ef084cc8f4958c1b6e4ead99136631bef6d8ddba)
encoding.h
Go to the documentation of this file.
1#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H /*-*-C++-*-vi:se ft=cpp:*/
2#define RUBY_INTERNAL_ENCODING_ENCODING_H
24#include "ruby/oniguruma.h"
31#include "ruby/internal/value.h"
34
36
37
43
50enum ruby_encoding_consts {
51
53 RUBY_ENCODING_INLINE_MAX = 127,
54
56 RUBY_ENCODING_SHIFT = (RUBY_FL_USHIFT+10),
57
59 RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT
60 /* RUBY_FL_USER10..RUBY_FL_USER16 */),
61
63 RUBY_ENCODING_MAXNAMELEN = 42
64};
65
66#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX
67#define ENCODING_SHIFT RUBY_ENCODING_SHIFT
68#define ENCODING_MASK RUBY_ENCODING_MASK
79static inline void
81{
82 VALUE f = /* upcast */ encindex;
83
84 f <<= RUBY_ENCODING_SHIFT;
85 RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
86 RB_FL_SET_RAW(obj, f);
87}
88
97static inline int
99{
100 VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT;
101
102 return RBIMPL_CAST((int)ret);
103}
104
105#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i)
106#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i)
107#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj)
108#define ENCODING_GET(obj) RB_ENCODING_GET(obj)
109#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj)
110#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN
117
139int rb_char_to_option_kcode(int c, int *option, int *kcode);
140
152int rb_define_dummy_encoding(const char *name);
153
163
175
183int rb_enc_get_index(VALUE obj);
184
193static inline int
195{
196 int encindex = RB_ENCODING_GET_INLINED(obj);
197
198 if (encindex == RUBY_ENCODING_INLINE_MAX) {
199 return rb_enc_get_index(obj);
200 }
201 else {
202 return encindex;
203 }
204}
205
216void rb_enc_set_index(VALUE obj, int encindex);
217
219static inline void
220RB_ENCODING_SET(VALUE obj, int encindex)
221{
222 rb_enc_set_index(obj, encindex);
223}
224
236static inline void
238{
239 RB_ENCODING_SET(obj, encindex);
240 RB_ENC_CODERANGE_SET(obj, cr);
241}
242
251int rb_enc_capable(VALUE obj);
252
261int rb_enc_find_index(const char *name);
262
276int rb_enc_alias(const char *alias, const char *orig);
277
287
298
308
316rb_encoding *rb_enc_get(VALUE obj);
317
330rb_encoding *rb_enc_compatible(VALUE str1, VALUE str2);
331
343
358VALUE rb_enc_associate_index(VALUE obj, int encindex);
359
371VALUE rb_enc_associate(VALUE obj, rb_encoding *enc);
372
386void rb_enc_copy(VALUE dst, VALUE src);
387
388
397rb_encoding *rb_enc_from_index(int idx);
398
407rb_encoding *rb_enc_find(const char *name);
408
415static inline const char *
416rb_enc_name(rb_encoding *enc)
417{
418 return enc->name;
419}
420
430static inline int
432{
433 return enc->min_enc_len;
434}
435
445static inline int
447{
448 return enc->max_enc_len;
449}
450
467int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc);
468
485int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc);
486
513int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc);
514
515#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
516#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
517#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
518#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
519#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
535int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
536
549unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
550
569static inline unsigned int
570rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
571{
572 return rb_enc_codepoint_len(p, e, 0, enc);
573 /* ^^^
574 * This can be `NULL` in C, `nullptr` in C++, and `0` for both.
575 * We choose the most portable one here.
576 */
577}
578
579
589static inline OnigCodePoint
590rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
591{
592 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
593 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
594
595 return ONIGENC_MBC_TO_CODE(enc, up, ue);
596}
597
607int rb_enc_codelen(int code, rb_encoding *enc);
608
617static inline int
619{
620 OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
621
622 return ONIGENC_CODE_TO_MBCLEN(enc, uc);
623}
624
641static inline int
642rb_enc_mbcput(unsigned int c, void *buf, rb_encoding *enc)
643{
644 OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
645 OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf);
646
647 return ONIGENC_CODE_TO_MBC(enc, uc, ubuf);
648}
649
660static inline char *
661rb_enc_prev_char(const char *s, const char *p, const char *e, rb_encoding *enc)
662{
663 const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
664 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
665 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
666 OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue);
667
668 return RBIMPL_CAST((char *)ur);
669}
670
681static inline char *
682rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
683{
684 const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
685 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
686 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
687 OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue);
688
689 return RBIMPL_CAST((char *)ur);
690}
691
702static inline char *
703rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
704{
705 const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
706 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
707 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
708 OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue);
709
710 return RBIMPL_CAST((char *)ur);
711}
712
724static inline char *
725rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
726{
727 const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
728 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
729 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
730 const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n);
731
732 return RBIMPL_CAST((char *)ur);
733}
734
745static inline int
746rb_enc_asciicompat_inline(rb_encoding *enc)
747{
748 return rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc);
749}
750
766static inline bool
767rb_enc_asciicompat(rb_encoding *enc)
768{
769 if (rb_enc_mbminlen(enc) != 1) {
770 return false;
771 }
772 else if (rb_enc_dummy_p(enc)) {
773 return false;
774 }
775 else {
776 return true;
777 }
778}
779
787static inline bool
789{
790 rb_encoding *enc = rb_enc_get(str);
791
792 return rb_enc_asciicompat(enc);
793}
794
803VALUE rb_enc_from_encoding(rb_encoding *enc);
804
821
833
845
857
871
882
891
900
901#ifndef rb_ascii8bit_encindex
913int rb_ascii8bit_encindex(void);
914#endif
915
925static inline bool
930
931#ifndef rb_utf8_encindex
939int rb_utf8_encindex(void);
940#endif
941
942#ifndef rb_usascii_encindex
950int rb_usascii_encindex(void);
951#endif
952
959int rb_locale_encindex(void);
960
967int rb_filesystem_encindex(void);
968
977
986
997
1007void rb_enc_set_default_internal(VALUE encoding);
1008
1019
1021
1022
1023#define RB_ENCODING_GET RB_ENCODING_GET
1024#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED
1025#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT
1026#define RB_ENCODING_SET RB_ENCODING_SET
1027#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED
1028#define rb_enc_asciicompat rb_enc_asciicompat
1029#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen
1030#define rb_enc_codepoint rb_enc_codepoint
1031#define rb_enc_left_char_head rb_enc_left_char_head
1032#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint
1033#define rb_enc_mbcput rb_enc_mbcput
1034#define rb_enc_mbmaxlen rb_enc_mbmaxlen
1035#define rb_enc_mbminlen rb_enc_mbminlen
1036#define rb_enc_name rb_enc_name
1037#define rb_enc_prev_char rb_enc_prev_char
1038#define rb_enc_right_char_head rb_enc_right_char_head
1039#define rb_enc_step_back rb_enc_step_back
1040#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p
1043#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */
ruby_coderange_type
What rb_enc_str_coderange() returns.
Definition coderange.h:33
static void RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
Destructively modifies the passed object so that its (inline) code range is the passed one.
Definition coderange.h:129
Defines RBIMPL_ATTR_CONST.
#define RBIMPL_ATTR_CONST()
Wraps (or simulates) __attribute__((const))
Definition const.h:36
Defines RBIMPL_ATTR_DEPRECATED.
Tweaking visibility of C variables/functions.
#define RUBY_EXTERN
Declaration of externally visible global variables.
Definition dllexport.h:45
#define RBIMPL_SYMBOL_EXPORT_END()
Counterpart of RBIMPL_SYMBOL_EXPORT_BEGIN.
Definition dllexport.h:74
#define RBIMPL_SYMBOL_EXPORT_BEGIN()
Shortcut macro equivalent to RUBY_SYMBOL_EXPORT_BEGIN extern "C" {.
Definition dllexport.h:65
Defines enum ruby_fl_type.
@ RUBY_FL_USHIFT
Number of bits in ruby_fl_type that are not open to users.
Definition fl_type.h:159
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
Definition fl_type.h:469
static void RB_FL_SET_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_SET().
Definition fl_type.h:606
static void RB_FL_UNSET_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_UNSET().
Definition fl_type.h:666
VALUE rb_cEncoding
Encoding class.
Definition encoding.c:57
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
Definition encoding.c:197
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
Definition encoding.c:920
int rb_to_encoding_index(VALUE obj)
Obtains a encoding index from a wider range of objects (than rb_enc_find_index()).
Definition encoding.c:261
int rb_filesystem_encindex(void)
Identical to rb_filesystem_encoding(), except it returns the encoding's index instead of the encoding...
Definition encoding.c:1512
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
Definition encoding.c:1458
static void RB_ENCODING_SET_INLINED(VALUE obj, int encindex)
Destructively assigns the passed encoding to the passed object.
Definition encoding.h:80
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
Definition encoding.c:1446
static bool RB_ENCODING_IS_ASCII8BIT(VALUE obj)
Queries if the passed object is in ascii 8bit (== binary) encoding.
Definition encoding.h:926
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
Definition encoding.c:323
const OnigEncodingType rb_encoding
The type of encoding.
Definition encoding.h:116
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
Definition encoding.c:1520
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
Definition encoding.c:1659
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
Definition encoding.c:1133
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
Definition encoding.h:682
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
Definition encoding.c:1464
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
Definition encoding.c:1157
static int RB_ENCODING_GET(VALUE obj)
Just another name of rb_enc_get_index.
Definition encoding.h:194
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
Definition encoding.c:1452
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
Definition encoding.c:1211
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
Definition encoding.c:638
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
Definition encoding.c:191
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
Definition encoding.c:971
VALUE rb_locale_charmap(VALUE klass)
Returns a platform-depended "charmap" of the current locale.
Definition localeinit.c:91
void rb_enc_set_default_internal(VALUE encoding)
Destructively assigns the passed encoding as the default internal encoding.
Definition encoding.c:1709
VALUE rb_enc_default_external(void)
Identical to rb_default_external_encoding(), except it returns the Ruby-level counterpart instance of...
Definition encoding.c:1586
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
Definition encoding.h:703
rb_encoding * rb_enc_find(const char *name)
Identical to rb_find_encoding(), except it takes a C's string instead of Ruby's.
Definition encoding.c:859
rb_encoding * rb_find_encoding(VALUE obj)
Identical to rb_to_encoding_index(), except the return type.
Definition encoding.c:330
int rb_define_dummy_encoding(const char *name)
Creates a new "dummy" encoding.
Definition encoding.c:566
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
Definition encoding.h:570
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
Definition encoding.c:1572
int rb_locale_encindex(void)
Identical to rb_locale_encoding(), except it returns the encoding's index instead of the encoding its...
Definition encoding.c:1484
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
Definition encoding.h:446
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
Definition encoding.c:1038
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
Definition encoding.c:1163
static void RB_ENCODING_SET(VALUE obj, int encindex)
Just another name of rb_enc_set_index.
Definition encoding.h:220
int rb_enc_capable(VALUE obj)
Queries if the passed object can have its encoding.
Definition encoding.c:884
static void RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr)
This is RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo.
Definition encoding.h:237
VALUE rb_enc_default_internal(void)
Identical to rb_default_internal_encoding(), except it returns the Ruby-level counterpart instance of...
Definition encoding.c:1668
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
Definition encoding.h:98
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
Definition encoding.c:1506
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
Definition encoding.h:590
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
Definition encoding.c:1470
void rb_enc_set_default_external(VALUE encoding)
Destructively assigns the passed encoding as the default external encoding.
Definition encoding.c:1626
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
Definition encoding.h:431
int rb_enc_alias(const char *alias, const char *orig)
Registers an "alias" name.
Definition encoding.c:670
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
Definition encoding.h:618
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
Definition encoding.h:725
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
Definition encoding.c:1187
static bool rb_enc_str_asciicompat_p(VALUE str)
Queries if the passed string is in an ASCII-compatible encoding.
Definition encoding.h:788
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
Definition encoding.c:1476
int len
Length of the buffer.
Definition io.h:8
Defines RBIMPL_ATTR_NOALIAS.
#define RBIMPL_ATTR_NOALIAS()
Wraps (or simulates) __declspec((noalias))
Definition noalias.h:66
#define inline
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition defines.h:66
Defines RBIMPL_ATTR_PURE.
#define RBIMPL_ATTR_PURE()
Wraps (or simulates) __attribute__((pure))
Definition pure.h:33
Defines struct RBasic.
Defines RBIMPL_ATTR_RETURNS_NONNULL.
#define RBIMPL_ATTR_RETURNS_NONNULL()
Wraps (or simulates) __attribute__((returns_nonnull))
Defines VALUE and ID.
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40