14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/numeric.h"
35#include "internal/object.h"
36#include "internal/proc.h"
37#include "internal/re.h"
38#include "internal/sanitizers.h"
39#include "internal/string.h"
40#include "internal/transcode.h"
45#include "ruby_assert.h"
48#if defined HAVE_CRYPT_R
49# if defined HAVE_CRYPT_H
52#elif !defined HAVE_CRYPT
53# include "missing/crypt.h"
54# define HAVE_CRYPT_R 1
57#define BEG(no) (regs->beg[(no)])
58#define END(no) (regs->end[(no)])
61#undef rb_usascii_str_new
65#undef rb_usascii_str_new_cstr
66#undef rb_utf8_str_new_cstr
67#undef rb_enc_str_new_cstr
68#undef rb_external_str_new_cstr
69#undef rb_locale_str_new_cstr
70#undef rb_str_dup_frozen
71#undef rb_str_buf_new_cstr
101#define RUBY_MAX_CHAR_LEN 16
102#define STR_SHARED_ROOT FL_USER5
103#define STR_BORROWED FL_USER6
104#define STR_TMPLOCK FL_USER7
105#define STR_NOFREE FL_USER18
106#define STR_FAKESTR FL_USER19
108#define STR_SET_NOEMBED(str) do {\
109 FL_SET((str), STR_NOEMBED);\
110 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
112#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
114#define STR_SET_LEN(str, n) do { \
115 RSTRING(str)->len = (n); \
119str_enc_fastpath(
VALUE str)
123 case ENCINDEX_ASCII_8BIT:
125 case ENCINDEX_US_ASCII:
132#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
133#define TERM_FILL(ptr, termlen) do {\
134 char *const term_fill_ptr = (ptr);\
135 const int term_fill_len = (termlen);\
136 *term_fill_ptr = '\0';\
137 if (UNLIKELY(term_fill_len > 1))\
138 memset(term_fill_ptr, 0, term_fill_len);\
141#define RESIZE_CAPA(str,capacity) do {\
142 const int termlen = TERM_LEN(str);\
143 RESIZE_CAPA_TERM(str,capacity,termlen);\
145#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
146 if (STR_EMBED_P(str)) {\
147 if (str_embed_capa(str) < capacity + termlen) {\
148 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
149 const long tlen = RSTRING_LEN(str);\
150 memcpy(tmp, RSTRING_PTR(str), tlen);\
151 RSTRING(str)->as.heap.ptr = tmp;\
152 RSTRING(str)->len = tlen;\
153 STR_SET_NOEMBED(str);\
154 RSTRING(str)->as.heap.aux.capa = (capacity);\
158 assert(!FL_TEST((str), STR_SHARED)); \
159 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
160 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
161 RSTRING(str)->as.heap.aux.capa = (capacity);\
165#define STR_SET_SHARED(str, shared_str) do { \
166 if (!FL_TEST(str, STR_FAKESTR)) { \
167 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
168 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
169 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
170 FL_SET((str), STR_SHARED); \
171 FL_SET((shared_str), STR_SHARED_ROOT); \
172 if (RBASIC_CLASS((shared_str)) == 0) \
173 FL_SET_RAW((shared_str), STR_BORROWED); \
177#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
178#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
181#define STR_ENC_GET(str) get_encoding(str)
183#if !defined SHARABLE_MIDDLE_SUBSTRING
184# define SHARABLE_MIDDLE_SUBSTRING 0
186#if !SHARABLE_MIDDLE_SUBSTRING
187#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
189#define SHARABLE_SUBSTRING_P(beg, len, end) 1
194str_embed_capa(
VALUE str)
196 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
200rb_str_reembeddable_p(
VALUE str)
202 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
206rb_str_embed_size(
long capa)
212rb_str_size_as_embedded(
VALUE str)
215 if (STR_EMBED_P(str)) {
216 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
220 else if (rb_str_reembeddable_p(str)) {
221 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
224 real_size =
sizeof(
struct RString);
230STR_EMBEDDABLE_P(
long len,
long termlen)
232 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
237static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
238static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
240static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
241static inline void str_modifiable(
VALUE str);
245str_make_independent(
VALUE str)
247 long len = RSTRING_LEN(str);
248 int termlen = TERM_LEN(str);
249 str_make_independent_expand((str),
len, 0L, termlen);
252static inline int str_dependent_p(
VALUE str);
255rb_str_make_independent(
VALUE str)
257 if (str_dependent_p(str)) {
258 str_make_independent(str);
263rb_str_make_embedded(
VALUE str)
268 char *buf =
RSTRING(str)->as.heap.ptr;
272 STR_SET_LEN(str,
len);
275 memcpy(RSTRING_PTR(str), buf,
len);
279 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
283rb_debug_rstring_null_ptr(
const char *func)
285 fprintf(stderr,
"%s is returning NULL!! "
286 "SIGSEGV is highly expected to follow immediately.\n"
287 "If you could reproduce, attach your debugger here, "
288 "and look at the passed string.\n",
293static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
296get_encoding(
VALUE str)
302mustnot_broken(
VALUE str)
304 if (is_broken_string(str)) {
305 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
310mustnot_wchar(
VALUE str)
314 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
320static VALUE register_fstring(
VALUE str,
bool copy);
327#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
335fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
345 if (rb_objspace_garbage_object_p(str)) {
357 rb_enc_copy(new_str, str);
370 if (STR_SHARED_P(str)) {
372 str_make_independent(str);
375 if (!BARE_STRING_P(str)) {
379 RBASIC(str)->flags |= RSTRING_FSTR;
381 *key = *value = arg->fstr = str;
395 if (
FL_TEST(str, RSTRING_FSTR))
398 bare = BARE_STRING_P(str);
400 if (STR_EMBED_P(str)) {
405 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
412 rb_str_resize(str, RSTRING_LEN(str));
414 fstr = register_fstring(str, FALSE);
417 str_replace_shared_without_enc(str, fstr);
425register_fstring(
VALUE str,
bool copy)
432 st_table *frozen_strings = rb_vm_fstring_table();
435 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
436 }
while (UNDEF_P(args.fstr));
448setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
464 return (
VALUE)fake_str;
473 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
482rb_fstring_new(
const char *ptr,
long len)
485 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
492 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
496rb_fstring_cstr(
const char *
ptr)
498 return rb_fstring_new(
ptr, strlen(
ptr));
502fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
512 const char *aptr, *bptr;
515 return (alen != blen ||
517 memcmp(aptr, bptr, alen) != 0);
521single_byte_optimizable(
VALUE str)
529 enc = STR_ENC_GET(str);
540static inline const char *
541search_nonascii(
const char *p,
const char *e)
543 const uintptr_t *s, *t;
545#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
546# if SIZEOF_UINTPTR_T == 8
547# define NONASCII_MASK UINT64_C(0x8080808080808080)
548# elif SIZEOF_UINTPTR_T == 4
549# define NONASCII_MASK UINT32_C(0x80808080)
551# error "don't know what to do."
554# if SIZEOF_UINTPTR_T == 8
555# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
556# elif SIZEOF_UINTPTR_T == 4
557# define NONASCII_MASK 0x80808080UL
559# error "don't know what to do."
563 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
564#if !UNALIGNED_WORD_ACCESS
565 if ((uintptr_t)p % SIZEOF_VOIDP) {
566 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
571 case 7:
if (p[-7]&0x80)
return p-7;
572 case 6:
if (p[-6]&0x80)
return p-6;
573 case 5:
if (p[-5]&0x80)
return p-5;
574 case 4:
if (p[-4]&0x80)
return p-4;
576 case 3:
if (p[-3]&0x80)
return p-3;
577 case 2:
if (p[-2]&0x80)
return p-2;
578 case 1:
if (p[-1]&0x80)
return p-1;
583#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
584#define aligned_ptr(value) \
585 __builtin_assume_aligned((value), sizeof(uintptr_t))
587#define aligned_ptr(value) (uintptr_t *)(value)
590 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
593 if (*s & NONASCII_MASK) {
594#ifdef WORDS_BIGENDIAN
595 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
597 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
607 case 7:
if (e[-7]&0x80)
return e-7;
608 case 6:
if (e[-6]&0x80)
return e-6;
609 case 5:
if (e[-5]&0x80)
return e-5;
610 case 4:
if (e[-4]&0x80)
return e-4;
612 case 3:
if (e[-3]&0x80)
return e-3;
613 case 2:
if (e[-2]&0x80)
return e-2;
614 case 1:
if (e[-1]&0x80)
return e-1;
622 const char *e = p +
len;
624 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
626 p = search_nonascii(p, e);
630 if (rb_enc_asciicompat(enc)) {
631 p = search_nonascii(p, e);
634 int ret = rb_enc_precise_mbclen(p, e, enc);
638 p = search_nonascii(p, e);
644 int ret = rb_enc_precise_mbclen(p, e, enc);
660 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
663 p = search_nonascii(p, e);
667 else if (rb_enc_asciicompat(enc)) {
668 p = search_nonascii(p, e);
674 int ret = rb_enc_precise_mbclen(p, e, enc);
681 p = search_nonascii(p, e);
687 int ret = rb_enc_precise_mbclen(p, e, enc);
712 rb_enc_set_index(str1, rb_enc_get_index(str2));
720rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
725 str_enc_copy(dest, src);
726 if (RSTRING_LEN(dest) == 0) {
727 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
738 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
739 search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
750rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
752 str_enc_copy(dest, src);
759 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
765 return enc_coderange_scan(str, enc);
774 cr = enc_coderange_scan(str, get_encoding(str));
785 if (!rb_enc_asciicompat(enc))
787 else if (is_ascii_string(str))
793str_mod_check(
VALUE s,
const char *p,
long len)
795 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
801str_capacity(
VALUE str,
const int termlen)
803 if (STR_EMBED_P(str)) {
804 return str_embed_capa(str) - termlen;
806 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
810 return RSTRING(str)->as.heap.aux.capa;
817 return str_capacity(str, TERM_LEN(str));
821must_not_null(
const char *
ptr)
824 rb_raise(rb_eArgError,
"NULL pointer given");
831 size_t size = rb_str_embed_size(
capa);
833 assert(rb_gc_size_allocatable_p(size));
842str_alloc_heap(
VALUE klass)
851empty_str_alloc(
VALUE klass)
853 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
854 VALUE str = str_alloc_embed(klass, 0);
855 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
860str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
865 rb_raise(rb_eArgError,
"negative string size (or size too big)");
868 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
870 if (STR_EMBEDDABLE_P(
len, termlen)) {
871 str = str_alloc_embed(klass,
len + termlen);
877 str = str_alloc_heap(klass);
883 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
886 memcpy(RSTRING_PTR(str),
ptr,
len);
888 STR_SET_LEN(str,
len);
889 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
896 return str_new0(klass,
ptr,
len, 1);
917 rb_enc_associate_index(str, rb_utf8_encindex());
929 rb_enc_associate(str, enc);
941 __msan_unpoison_string(
ptr);
957 rb_enc_associate_index(str, rb_utf8_encindex());
966 rb_raise(rb_eArgError,
"wchar encoding given");
968 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
972str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
977 rb_raise(rb_eArgError,
"negative string size (or size too big)");
981 rb_encoding *enc = rb_enc_get_from_index(encindex);
985 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
986 str = str_alloc_heap(klass);
990 RBASIC(str)->flags |= STR_NOFREE;
992 rb_enc_associate_index(str, encindex);
1020static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1022 int ecflags,
VALUE ecopts);
1027 int encidx = rb_enc_to_index(enc);
1028 if (rb_enc_get_index(str) == encidx)
1029 return is_ascii_string(str);
1040 if (!to)
return str;
1041 if (!from) from = rb_enc_get(str);
1042 if (from == to)
return str;
1043 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1044 rb_is_ascii8bit_enc(to)) {
1045 if (STR_ENC_GET(str) != to) {
1046 str = rb_str_dup(str);
1047 rb_enc_associate(str, to);
1053 newstr = str_cat_conv_enc_opts(rb_str_buf_new(
len), 0,
ptr,
len,
1054 from, to, ecflags, ecopts);
1055 if (
NIL_P(newstr)) {
1063rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1068 olen = RSTRING_LEN(newstr);
1069 if (ofs < -olen || olen < ofs)
1071 if (ofs < 0) ofs += olen;
1073 STR_SET_LEN(newstr, ofs);
1074 return rb_str_cat(newstr,
ptr,
len);
1077 rb_str_modify(newstr);
1078 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1086 STR_SET_LEN(str, 0);
1087 rb_enc_associate(str, enc);
1088 rb_str_cat(str,
ptr,
len);
1093str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1095 int ecflags,
VALUE ecopts)
1100 VALUE econv_wrapper;
1101 const unsigned char *start, *sp;
1102 unsigned char *dest, *dp;
1103 size_t converted_output = (size_t)ofs;
1108 RBASIC_CLEAR_CLASS(econv_wrapper);
1110 if (!ec)
return Qnil;
1113 sp = (
unsigned char*)
ptr;
1115 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1116 (dp = dest + converted_output),
1120 size_t converted_input = sp - start;
1121 size_t rest =
len - converted_input;
1122 converted_output = dp - dest;
1123 rb_str_set_len(newstr, converted_output);
1124 if (converted_input && converted_output &&
1125 rest < (LONG_MAX / converted_output)) {
1126 rest = (rest * converted_output) / converted_input;
1131 olen += rest < 2 ? 2 : rest;
1132 rb_str_resize(newstr, olen);
1138 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1139 rb_str_set_len(newstr,
len);
1140 rb_enc_associate(newstr, to);
1159 const int eidx = rb_enc_to_index(eenc);
1162 return rb_enc_str_new(
ptr,
len, eenc);
1166 if ((eidx == rb_ascii8bit_encindex()) ||
1167 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1171 ienc = rb_default_internal_encoding();
1172 if (!ienc || eenc == ienc) {
1173 return rb_enc_str_new(
ptr,
len, eenc);
1177 if ((eidx == rb_ascii8bit_encindex()) ||
1178 (eidx == rb_usascii_encindex()) ||
1179 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1180 return rb_enc_str_new(
ptr,
len, ienc);
1183 str = rb_enc_str_new(NULL, 0, ienc);
1186 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1187 rb_str_initialize(str,
ptr,
len, eenc);
1195 int eidx = rb_enc_to_index(eenc);
1196 if (eidx == rb_usascii_encindex() &&
1197 !is_ascii_string(str)) {
1198 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1201 rb_enc_associate_index(str, eidx);
1236rb_filesystem_str_new_cstr(
const char *
ptr)
1260str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1262 const int termlen = TERM_LEN(str);
1267 if (str_embed_capa(str2) >=
len + termlen) {
1268 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1269 STR_SET_EMBED(str2);
1270 memcpy(ptr2, RSTRING_PTR(str),
len);
1271 TERM_FILL(ptr2+
len, termlen);
1275 if (STR_SHARED_P(str)) {
1276 root =
RSTRING(str)->as.heap.aux.shared;
1280 root = rb_str_new_frozen(str);
1284 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1286 rb_fatal(
"about to free a possible shared root");
1288 char *ptr2 = STR_HEAP_PTR(str2);
1290 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1293 FL_SET(str2, STR_NOEMBED);
1295 STR_SET_SHARED(str2, root);
1298 STR_SET_LEN(str2,
len);
1306 str_replace_shared_without_enc(str2, str);
1307 rb_enc_cr_str_exact_copy(str2, str);
1314 return str_replace_shared(str_alloc_heap(klass), str);
1331rb_str_new_frozen_String(
VALUE orig)
1338rb_str_tmp_frozen_acquire(
VALUE orig)
1341 return str_new_frozen_buffer(0, orig, FALSE);
1345rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1347 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1348 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1350 VALUE str = str_alloc_heap(0);
1353 FL_SET(str, STR_SHARED_ROOT);
1355 size_t capa = str_capacity(orig, TERM_LEN(orig));
1361 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT)) {
1362 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1369 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1370 RBASIC(orig)->flags &= ~STR_NOFREE;
1371 STR_SET_SHARED(orig, str);
1381rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1386 if (STR_EMBED_P(tmp)) {
1395 assert(RSTRING_LEN(orig) == RSTRING_LEN(tmp));
1399 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1400 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1405 STR_SET_LEN(tmp, 0);
1413 return str_new_frozen_buffer(klass, orig, TRUE);
1419 assert(!STR_EMBED_P(orig));
1420 assert(!STR_SHARED_P(orig));
1422 VALUE str = str_alloc_heap(klass);
1423 STR_SET_LEN(str, RSTRING_LEN(orig));
1424 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1425 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1426 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1427 RBASIC(orig)->flags &= ~STR_NOFREE;
1428 STR_SET_SHARED(orig, str);
1435str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1439 long len = RSTRING_LEN(orig);
1440 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1442 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1443 str = str_new0(klass, RSTRING_PTR(orig),
len, termlen);
1444 assert(STR_EMBED_P(str));
1449 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1450 long rest = RSTRING_LEN(
shared) - ofs - RSTRING_LEN(orig);
1453 assert(ofs + rest <= RSTRING_LEN(
shared));
1456 if ((ofs > 0) || (rest > 0) ||
1459 str = str_new_shared(klass,
shared);
1460 assert(!STR_EMBED_P(str));
1461 RSTRING(str)->as.heap.ptr += ofs;
1462 STR_SET_LEN(str, RSTRING_LEN(str) - (ofs + rest));
1470 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1471 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1473 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1474 STR_SET_LEN(str, RSTRING_LEN(orig));
1475 TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
1478 str = heap_str_make_shared(klass, orig);
1482 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1494str_new_empty_String(
VALUE str)
1497 rb_enc_copy(v, str);
1501#define STR_BUF_MIN_SIZE 63
1506 if (STR_EMBEDDABLE_P(
capa, 1)) {
1514 RSTRING(str)->as.heap.ptr[0] =
'\0';
1525 str = rb_str_buf_new(
len);
1534 return str_new(0, 0,
len);
1540 if (
FL_TEST(str, RSTRING_FSTR)) {
1541 st_data_t fstr = (st_data_t)str;
1545 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1546 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1551 if (STR_EMBED_P(str)) {
1552 RB_DEBUG_COUNTER_INC(obj_str_embed);
1554 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1555 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1556 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1559 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1560 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1564RUBY_FUNC_EXPORTED
size_t
1565rb_str_memsize(
VALUE str)
1567 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1568 return STR_HEAP_SIZE(str);
1578 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1581static inline void str_discard(
VALUE str);
1582static void str_shared_replace(
VALUE str,
VALUE str2);
1587 if (str != str2) str_shared_replace(str, str2);
1598 enc = STR_ENC_GET(str2);
1603 STR_SET_LEN(str, RSTRING_LEN(str2));
1605 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1607 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1608 rb_enc_associate(str, enc);
1612 if (STR_EMBED_P(str2)) {
1613 assert(!
FL_TEST(str2, STR_SHARED));
1614 long len = RSTRING_LEN(str2);
1615 assert(
len + termlen <= str_embed_capa(str2));
1617 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1618 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1619 RSTRING(str2)->as.heap.ptr = new_ptr;
1620 STR_SET_LEN(str2,
len);
1622 STR_SET_NOEMBED(str2);
1625 STR_SET_NOEMBED(str);
1627 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1629 if (
FL_TEST(str2, STR_SHARED)) {
1631 STR_SET_SHARED(str,
shared);
1634 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1638 STR_SET_EMBED(str2);
1639 RSTRING_PTR(str2)[0] = 0;
1640 STR_SET_LEN(str2, 0);
1641 rb_enc_associate(str, enc);
1655 return rb_obj_as_string_result(str, obj);
1671 len = RSTRING_LEN(str2);
1672 if (STR_SHARED_P(str2)) {
1675 STR_SET_NOEMBED(str);
1676 STR_SET_LEN(str,
len);
1677 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1678 STR_SET_SHARED(str,
shared);
1679 rb_enc_cr_str_exact_copy(str, str2);
1682 str_replace_shared(str, str2);
1691 size_t size = rb_str_embed_size(
capa);
1693 assert(rb_gc_size_allocatable_p(size));
1713 const VALUE flag_mask =
1719 if (STR_EMBED_P(str)) {
1720 long len = RSTRING_LEN(str);
1722 assert(STR_EMBED_P(dup));
1723 assert(str_embed_capa(dup) >=
len + 1);
1729 root =
RSTRING(str)->as.heap.aux.shared;
1731 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1732 root = str = str_new_frozen(klass, str);
1735 assert(!STR_SHARED_P(root));
1738 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1739 FL_SET(root, STR_SHARED_ROOT);
1741 flags |= RSTRING_NOEMBED | STR_SHARED;
1744 STR_SET_LEN(dup, RSTRING_LEN(str));
1747 encidx = rb_enc_get_index(str);
1748 flags &= ~ENCODING_MASK;
1751 if (encidx) rb_enc_associate_index(dup, encidx);
1759 if (STR_EMBED_P(str)) {
1760 dup = ec_str_alloc_embed(ec, klass, RSTRING_LEN(str) + TERM_LEN(str));
1763 dup = ec_str_alloc_heap(ec, klass);
1766 return str_duplicate_setup(klass, str, dup);
1773 if (STR_EMBED_P(str)) {
1774 dup = str_alloc_embed(klass, RSTRING_LEN(str) + TERM_LEN(str));
1777 dup = str_alloc_heap(klass);
1780 return str_duplicate_setup(klass, str, dup);
1791rb_str_dup_m(
VALUE str)
1793 if (LIKELY(BARE_STRING_P(str))) {
1804 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1811 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1812 return ec_str_duplicate(ec,
rb_cString, str);
1827 static ID keyword_ids[2];
1828 VALUE orig, opt, venc, vcapa;
1833 if (!keyword_ids[0]) {
1834 keyword_ids[0] = rb_id_encoding();
1835 CONST_ID(keyword_ids[1],
"capacity");
1843 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1844 enc = rb_to_encoding(venc);
1846 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1851 if (
capa < STR_BUF_MIN_SIZE) {
1852 capa = STR_BUF_MIN_SIZE;
1856 len = RSTRING_LEN(orig);
1860 if (orig == str) n = 0;
1862 str_modifiable(str);
1863 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1865 const size_t size = (size_t)
capa + termlen;
1866 const char *
const old_ptr = RSTRING_PTR(str);
1867 const size_t osize = RSTRING_LEN(str) + TERM_LEN(str);
1868 char *new_ptr =
ALLOC_N(
char, size);
1869 if (STR_EMBED_P(str))
RUBY_ASSERT(osize <= str_embed_capa(str));
1870 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1872 RSTRING(str)->as.heap.ptr = new_ptr;
1874 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1875 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1876 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1878 STR_SET_LEN(str,
len);
1881 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
1882 rb_enc_cr_str_exact_copy(str, orig);
1884 FL_SET(str, STR_NOEMBED);
1891 rb_enc_associate(str, enc);
1903rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
1909 static ID keyword_ids[2];
1919 keyword_ids[0] = rb_id_encoding();
1920 CONST_ID(keyword_ids[1],
"capacity");
1922 encoding = kwargs[0];
1923 capacity = kwargs[1];
1934 if (UNDEF_P(encoding)) {
1936 encoding = rb_obj_encoding(orig);
1940 if (!UNDEF_P(encoding)) {
1941 enc = rb_to_encoding(encoding);
1946 if (UNDEF_P(capacity)) {
1948 VALUE empty_str = str_new(klass,
"", 0);
1950 rb_enc_associate(empty_str, enc);
1954 VALUE copy = str_duplicate(klass, orig);
1955 rb_enc_associate(copy, enc);
1968 if (orig_capa >
capa) {
1973 VALUE str = str_new0(klass, NULL,
capa, termlen);
1974 STR_SET_LEN(str, 0);
1975 TERM_FILL(RSTRING_PTR(str), termlen);
1978 rb_enc_associate(str, enc);
1982 rb_str_buf_append(str, orig);
1989#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2004static inline uintptr_t
2005count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2010 d = (d>>6) | (~d>>7);
2011 d &= NONASCII_MASK >> 7;
2014#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2016 return rb_popcount_intptr(d);
2020# if SIZEOF_VOIDP == 8
2029enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
2035 long diff = (long)(e - p);
2041 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2042 const uintptr_t *s, *t;
2043 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2044 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2045 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2046 while (p < (
const char *)s) {
2047 if (is_utf8_lead_byte(*p))
len++;
2051 len += count_utf8_lead_bytes_with_word(s);
2054 p = (
const char *)s;
2057 if (is_utf8_lead_byte(*p))
len++;
2063 else if (rb_enc_asciicompat(enc)) {
2068 q = search_nonascii(p, e);
2074 p += rb_enc_fast_mbclen(p, e, enc);
2081 q = search_nonascii(p, e);
2087 p += rb_enc_mbclen(p, e, enc);
2094 for (c=0; p<e; c++) {
2095 p += rb_enc_mbclen(p, e, enc);
2110rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2118 long diff = (long)(e - p);
2121 else if (rb_enc_asciicompat(enc)) {
2125 q = search_nonascii(p, e);
2133 ret = rb_enc_precise_mbclen(p, e, enc);
2148 for (c=0; p<e; c++) {
2149 ret = rb_enc_precise_mbclen(p, e, enc);
2173 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2174 if (!enc) enc = STR_ENC_GET(str);
2175 p = RSTRING_PTR(str);
2176 e = RSTRING_END(str);
2180 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2185 return enc_strlen(p, e, enc, cr);
2192 return str_strlen(str, NULL);
2206 return LONG2NUM(str_strlen(str, NULL));
2218rb_str_bytesize(
VALUE str)
2236rb_str_empty(
VALUE str)
2238 return RBOOL(RSTRING_LEN(str) == 0);
2256 char *ptr1, *ptr2, *ptr3;
2261 enc = rb_enc_check_str(str1, str2);
2265 if (len1 > LONG_MAX - len2) {
2266 rb_raise(rb_eArgError,
"string size too big");
2268 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2269 ptr3 = RSTRING_PTR(str3);
2270 memcpy(ptr3, ptr1, len1);
2271 memcpy(ptr3+len1, ptr2, len2);
2272 TERM_FILL(&ptr3[len1+len2], termlen);
2288 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2291 int enc1 = rb_enc_get_index(str1);
2292 int enc2 = rb_enc_get_index(str2);
2297 else if (enc2 < 0) {
2300 else if (enc1 != enc2) {
2303 else if (len1 > LONG_MAX - len2) {
2336 rb_enc_copy(str2, str);
2341 rb_raise(rb_eArgError,
"negative argument");
2343 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2344 if (STR_EMBEDDABLE_P(
len, 1)) {
2346 memset(RSTRING_PTR(str2), 0,
len + 1);
2353 STR_SET_LEN(str2,
len);
2354 rb_enc_copy(str2, str);
2357 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2358 rb_raise(rb_eArgError,
"argument too big");
2361 len *= RSTRING_LEN(str);
2362 termlen = TERM_LEN(str);
2364 ptr2 = RSTRING_PTR(str2);
2366 n = RSTRING_LEN(str);
2367 memcpy(ptr2, RSTRING_PTR(str), n);
2368 while (n <=
len/2) {
2369 memcpy(ptr2 + n, ptr2, n);
2372 memcpy(ptr2 + n, ptr2,
len-n);
2374 STR_SET_LEN(str2,
len);
2375 TERM_FILL(&ptr2[
len], termlen);
2376 rb_enc_cr_str_copy_for_substr(str2, str);
2402 VALUE tmp = rb_check_array_type(arg);
2411rb_check_lockedtmp(
VALUE str)
2413 if (
FL_TEST(str, STR_TMPLOCK)) {
2419str_modifiable(
VALUE str)
2421 rb_check_lockedtmp(str);
2426str_dependent_p(
VALUE str)
2428 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2437str_independent(
VALUE str)
2439 str_modifiable(str);
2440 return !str_dependent_p(str);
2444str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2452 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2457 STR_SET_LEN(str,
len);
2462 oldptr = RSTRING_PTR(str);
2464 memcpy(
ptr, oldptr,
len);
2466 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2469 STR_SET_NOEMBED(str);
2470 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2471 TERM_FILL(
ptr +
len, termlen);
2473 STR_SET_LEN(str,
len);
2480 if (!str_independent(str))
2481 str_make_independent(str);
2488 int termlen = TERM_LEN(str);
2489 long len = RSTRING_LEN(str);
2492 rb_raise(rb_eArgError,
"negative expanding string size");
2494 if (expand >= LONG_MAX -
len) {
2495 rb_raise(rb_eArgError,
"string size too big");
2498 if (!str_independent(str)) {
2499 str_make_independent_expand(str,
len, expand, termlen);
2501 else if (expand > 0) {
2502 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2509str_modify_keep_cr(
VALUE str)
2511 if (!str_independent(str))
2512 str_make_independent(str);
2519str_discard(
VALUE str)
2521 str_modifiable(str);
2522 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2523 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2524 RSTRING(str)->as.heap.ptr = 0;
2525 STR_SET_LEN(str, 0);
2536 if (!rb_enc_asciicompat(enc)) {
2556 return RSTRING_PTR(str);
2560zero_filled(
const char *s,
int n)
2562 for (; n > 0; --n) {
2569str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2571 const char *e = s +
len;
2573 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2574 if (zero_filled(s, minlen))
return s;
2580str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2585 if (str_dependent_p(str)) {
2586 if (!zero_filled(s +
len, termlen))
2587 str_make_independent_expand(str,
len, 0L, termlen);
2590 TERM_FILL(s +
len, termlen);
2593 return RSTRING_PTR(str);
2597rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2599 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2600 long len = RSTRING_LEN(str);
2604 rb_check_lockedtmp(str);
2605 str_make_independent_expand(str,
len, 0L, termlen);
2607 else if (str_dependent_p(str)) {
2608 if (termlen > oldtermlen)
2609 str_make_independent_expand(str,
len, 0L, termlen);
2612 if (!STR_EMBED_P(str)) {
2614 assert(!
FL_TEST((str), STR_SHARED));
2617 if (termlen > oldtermlen) {
2618 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2626str_null_check(
VALUE str,
int *w)
2628 char *s = RSTRING_PTR(str);
2629 long len = RSTRING_LEN(str);
2635 if (str_null_char(s,
len, minlen, enc)) {
2638 return str_fill_term(str, s,
len, minlen);
2641 if (!s || memchr(s, 0,
len)) {
2645 s = str_fill_term(str, s,
len, minlen);
2651rb_str_to_cstr(
VALUE str)
2654 return str_null_check(str, &w);
2662 char *s = str_null_check(str, &w);
2665 rb_raise(rb_eArgError,
"string contains null char");
2667 rb_raise(rb_eArgError,
"string contains null byte");
2673rb_str_fill_terminator(
VALUE str,
const int newminlen)
2675 char *s = RSTRING_PTR(str);
2676 long len = RSTRING_LEN(str);
2677 return str_fill_term(str, s,
len, newminlen);
2683 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2707str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2716 else if (rb_enc_asciicompat(enc)) {
2717 const char *p2, *e2;
2720 while (p < e && 0 < nth) {
2727 p2 = search_nonascii(p, e2);
2736 n = rb_enc_mbclen(p, e, enc);
2747 while (p < e && nth--) {
2748 p += rb_enc_mbclen(p, e, enc);
2759 return str_nth_len(p, e, &nth, enc);
2763str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2768 p = str_nth_len(p, e, &nth, enc);
2777str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2779 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2780 if (!pp)
return e - p;
2787 return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2788 STR_ENC_GET(str), single_byte_optimizable(str));
2793str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2796 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2797 const uintptr_t *s, *t;
2798 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2799 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2800 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2801 while (p < (
const char *)s) {
2802 if (is_utf8_lead_byte(*p)) nth--;
2806 nth -= count_utf8_lead_bytes_with_word(s);
2808 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2812 if (is_utf8_lead_byte(*p)) {
2813 if (nth == 0)
break;
2823str_utf8_offset(
const char *p,
const char *e,
long nth)
2825 const char *pp = str_utf8_nth(p, e, &nth);
2834 if (single_byte_optimizable(str) || pos < 0)
2837 char *p = RSTRING_PTR(str);
2838 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2843str_subseq(
VALUE str,
long beg,
long len)
2849 assert(beg+
len <= RSTRING_LEN(str));
2851 const int termlen = TERM_LEN(str);
2852 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str))) {
2859 if (str_embed_capa(str2) >=
len + termlen) {
2860 char *ptr2 =
RSTRING(str2)->as.embed.ary;
2861 STR_SET_EMBED(str2);
2862 memcpy(ptr2, RSTRING_PTR(str) + beg,
len);
2863 TERM_FILL(ptr2+
len, termlen);
2865 STR_SET_LEN(str2,
len);
2869 str_replace_shared(str2, str);
2870 assert(!STR_EMBED_P(str2));
2872 RSTRING(str2)->as.heap.ptr += beg;
2873 if (RSTRING_LEN(str2) >
len) {
2874 STR_SET_LEN(str2,
len);
2884 VALUE str2 = str_subseq(str, beg,
len);
2885 rb_enc_cr_str_copy_for_substr(str2, str);
2894 long blen = RSTRING_LEN(str);
2896 char *p, *s = RSTRING_PTR(str), *e = s + blen;
2898 if (
len < 0)
return 0;
2902 if (single_byte_optimizable(str)) {
2903 if (beg > blen)
return 0;
2906 if (beg < 0)
return 0;
2908 if (
len > blen - beg)
2910 if (
len < 0)
return 0;
2915 if (
len > -beg)
len = -beg;
2918 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
2921 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
2927 slen = str_strlen(str, enc);
2929 if (beg < 0)
return 0;
2931 if (
len == 0)
goto end;
2934 else if (beg > 0 && beg > RSTRING_LEN(str)) {
2938 if (beg > str_strlen(str, enc))
return 0;
2943 enc == rb_utf8_encoding()) {
2944 p = str_utf8_nth(s, e, &beg);
2945 if (beg > 0)
return 0;
2946 len = str_utf8_offset(p, e,
len);
2952 p = s + beg * char_sz;
2956 else if (
len * char_sz > e - p)
2961 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2962 if (beg > 0)
return 0;
2966 len = str_offset(p, e,
len, enc, 0);
2974static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2979 return str_substr(str, beg,
len, TRUE);
2983str_substr(
VALUE str,
long beg,
long len,
int empty)
2987 if (!p)
return Qnil;
2988 if (!
len && !empty)
return Qnil;
2990 beg = p - RSTRING_PTR(str);
2992 VALUE str2 = str_subseq(str, beg,
len);
2993 rb_enc_cr_str_copy_for_substr(str2, str);
3002 rb_str_resize(str, RSTRING_LEN(str));
3003 return rb_obj_freeze(str);
3019 return rb_str_dup(str);
3048str_uminus(
VALUE str)
3051 str = rb_str_dup(str);
3053 return rb_fstring(str);
3057#define rb_str_dup_frozen rb_str_new_frozen
3062 if (
FL_TEST(str, STR_TMPLOCK)) {
3065 FL_SET(str, STR_TMPLOCK);
3072 if (!
FL_TEST(str, STR_TMPLOCK)) {
3079RUBY_FUNC_EXPORTED
VALUE
3090 const int termlen = TERM_LEN(str);
3092 str_modifiable(str);
3093 if (STR_SHARED_P(str)) {
3096 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3097 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3104 else if (
len > RSTRING_LEN(str)) {
3107 const char *
const prev_end = RSTRING_END(str);
3108 const char *
const new_end = RSTRING_PTR(str) +
len;
3118 else if (
len < RSTRING_LEN(str)) {
3126 STR_SET_LEN(str,
len);
3127 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3134 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3137 int independent = str_independent(str);
3138 long slen = RSTRING_LEN(str);
3146 const int termlen = TERM_LEN(str);
3147 if (STR_EMBED_P(str)) {
3148 if (
len == slen)
return str;
3149 if (str_embed_capa(str) >=
len + termlen) {
3150 STR_SET_LEN(str,
len);
3154 str_make_independent_expand(str, slen,
len - slen, termlen);
3156 else if (str_embed_capa(str) >=
len + termlen) {
3157 char *
ptr = STR_HEAP_PTR(str);
3159 if (slen >
len) slen =
len;
3162 STR_SET_LEN(str,
len);
3163 if (independent) ruby_xfree(
ptr);
3166 else if (!independent) {
3167 if (
len == slen)
return str;
3168 str_make_independent_expand(str, slen,
len - slen, termlen);
3172 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3173 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3176 else if (
len == slen)
return str;
3177 STR_SET_LEN(str,
len);
3184str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3187 str_modify_keep_cr(str);
3192 if (
len == 0)
return 0;
3194 long total, olen,
off = -1;
3196 const int termlen = TERM_LEN(str);
3199 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3203 long capa = str_capacity(str, termlen);
3205 if (olen > LONG_MAX -
len) {
3206 rb_raise(rb_eArgError,
"string sizes too big");
3210 if (total >= LONG_MAX / 2) {
3213 while (total >
capa) {
3216 RESIZE_CAPA_TERM(str,
capa, termlen);
3217 sptr = RSTRING_PTR(str);
3222 memcpy(sptr + olen,
ptr,
len);
3223 STR_SET_LEN(str, total);
3224 TERM_FILL(sptr + total, termlen);
3229#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3230#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3235 if (
len == 0)
return str;
3237 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3239 return str_buf_cat(str,
ptr,
len);
3254rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3255 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3264 if (str_encindex == ptr_encindex) {
3266 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3270 str_enc = rb_enc_from_index(str_encindex);
3271 ptr_enc = rb_enc_from_index(ptr_encindex);
3272 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3275 if (RSTRING_LEN(str) == 0) {
3284 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3288 str_cr = rb_enc_str_coderange(str);
3293 *ptr_cr_ret = ptr_cr;
3295 if (str_encindex != ptr_encindex &&
3298 str_enc = rb_enc_from_index(str_encindex);
3299 ptr_enc = rb_enc_from_index(ptr_encindex);
3304 res_encindex = str_encindex;
3309 res_encindex = str_encindex;
3313 res_encindex = ptr_encindex;
3318 res_encindex = str_encindex;
3325 res_encindex = str_encindex;
3331 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3333 str_buf_cat(str,
ptr,
len);
3339 rb_enc_name(str_enc), rb_enc_name(ptr_enc));
3346 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3356 if (rb_enc_asciicompat(enc)) {
3357 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3363 unsigned int c = (
unsigned char)*
ptr;
3364 int len = rb_enc_codelen(c, enc);
3365 rb_enc_mbcput(c, buf, enc);
3366 rb_enc_cr_str_buf_cat(str, buf,
len,
3377 int str2_cr = rb_enc_str_coderange(str2);
3379 if (str_enc_fastpath(str)) {
3383 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3389 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3400 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3412 return rb_str_buf_append(str, str2);
3416rb_str_concat_literals(
size_t num,
const VALUE *strary)
3420 unsigned long len = 1;
3425 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3426 str = rb_str_buf_new(
len);
3427 str_enc_copy_direct(str, strary[0]);
3429 for (i = s; i < num; ++i) {
3430 const VALUE v = strary[i];
3433 rb_str_buf_append(str, v);
3434 if (encidx != ENCINDEX_US_ASCII) {
3436 rb_enc_set_index(str, encidx);
3461rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3463 str_modifiable(str);
3468 else if (argc > 1) {
3471 rb_enc_copy(arg_str, str);
3472 for (i = 0; i < argc; i++) {
3475 rb_str_buf_append(str, arg_str);
3507 if (rb_num_to_uint(str2, &code) == 0) {
3520 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3523 buf[0] = (char)code;
3524 rb_str_cat(str1, buf, 1);
3525 if (encidx != rb_enc_to_index(enc)) {
3526 rb_enc_associate_index(str1, encidx);
3531 long pos = RSTRING_LEN(str1);
3536 switch (
len = rb_enc_codelen(code, enc)) {
3537 case ONIGERR_INVALID_CODE_POINT_VALUE:
3538 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3540 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3546 rb_enc_mbcput(code, buf, enc);
3547 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3548 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3550 rb_str_resize(str1, pos+
len);
3551 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
3564rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3566 int encidx = rb_enc_to_index(enc);
3568 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3573 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3574 return ENCINDEX_ASCII_8BIT;
3597rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3599 str_modifiable(str);
3604 else if (argc > 1) {
3607 rb_enc_copy(arg_str, str);
3608 for (i = 0; i < argc; i++) {
3620 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
3622 if (e && !is_ascii_string(str)) {
3632 const char *ptr1, *ptr2;
3635 return (len1 != len2 ||
3637 memcmp(ptr1, ptr2, len1) != 0);
3651rb_str_hash_m(
VALUE str)
3657#define lesser(a,b) (((a)>(b))?(b):(a))
3665 if (RSTRING_LEN(str1) == 0)
return TRUE;
3666 if (RSTRING_LEN(str2) == 0)
return TRUE;
3669 if (idx1 == idx2)
return TRUE;
3670 rc1 = rb_enc_str_coderange(str1);
3671 rc2 = rb_enc_str_coderange(str2);
3674 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
3678 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
3688 const char *ptr1, *ptr2;
3691 if (str1 == str2)
return 0;
3694 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3703 if (len1 > len2)
return 1;
3706 if (retval > 0)
return 1;
3733 if (str1 == str2)
return Qtrue;
3740 return rb_str_eql_internal(str1, str2);
3764 if (str1 == str2)
return Qtrue;
3766 return rb_str_eql_internal(str1, str2);
3797 return rb_invcmp(str1, str2);
3839 return str_casecmp(str1, s);
3847 const char *p1, *p1end, *p2, *p2end;
3849 enc = rb_enc_compatible(str1, str2);
3854 p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3855 p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3856 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3857 while (p1 < p1end && p2 < p2end) {
3859 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3860 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3862 return INT2FIX(c1 < c2 ? -1 : 1);
3869 while (p1 < p1end && p2 < p2end) {
3870 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3871 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3873 if (0 <= c1 && 0 <= c2) {
3877 return INT2FIX(c1 < c2 ? -1 : 1);
3881 l1 = rb_enc_mbclen(p1, p1end, enc);
3882 l2 = rb_enc_mbclen(p2, p2end, enc);
3883 len = l1 < l2 ? l1 : l2;
3884 r = memcmp(p1, p2,
len);
3886 return INT2FIX(r < 0 ? -1 : 1);
3888 return INT2FIX(l1 < l2 ? -1 : 1);
3894 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
3895 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
3929 return str_casecmp_p(str1, s);
3936 VALUE folded_str1, folded_str2;
3937 VALUE fold_opt = sym_fold;
3939 enc = rb_enc_compatible(str1, str2);
3944 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3945 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3947 return rb_str_eql(folded_str1, folded_str2);
3951strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3952 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3954 const char *search_start = str_ptr;
3955 long pos, search_len = str_len - offset;
3959 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3960 if (pos < 0)
return pos;
3962 if (t == search_start + pos)
break;
3963 search_len -= t - search_start;
3964 if (search_len <= 0)
return -1;
3965 offset += t - search_start;
3968 return pos + offset;
3972#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3973#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
3976rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3978 const char *str_ptr, *str_ptr_end, *sub_ptr;
3979 long str_len, sub_len;
3982 enc = rb_enc_check(str, sub);
3983 if (is_broken_string(sub))
return -1;
3985 str_ptr = RSTRING_PTR(str);
3986 str_ptr_end = RSTRING_END(str);
3987 str_len = RSTRING_LEN(str);
3988 sub_ptr = RSTRING_PTR(sub);
3989 sub_len = RSTRING_LEN(sub);
3991 if (str_len < sub_len)
return -1;
3994 long str_len_char, sub_len_char;
3995 int single_byte = single_byte_optimizable(str);
3996 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3997 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3999 offset += str_len_char;
4000 if (offset < 0)
return -1;
4002 if (str_len_char - offset < sub_len_char)
return -1;
4003 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4006 if (sub_len == 0)
return offset;
4009 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4023rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4030 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4031 long slen = str_strlen(str, enc);
4033 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4045 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4046 enc, single_byte_optimizable(str));
4057 pos = rb_str_index(str, sub, pos);
4071str_ensure_byte_pos(
VALUE str,
long pos)
4073 const char *s = RSTRING_PTR(str);
4074 const char *e = RSTRING_END(str);
4075 const char *p = s + pos;
4076 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4078 "offset %ld does not land on character boundary", pos);
4124rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4130 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4131 long slen = RSTRING_LEN(str);
4133 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4144 str_ensure_byte_pos(str, pos);
4156 pos = rb_str_byteindex(str, sub, pos);
4157 if (pos >= 0)
return LONG2NUM(pos);
4166 char *hit, *adjusted;
4168 long slen, searchlen;
4171 sbeg = RSTRING_PTR(str);
4172 slen = RSTRING_LEN(sub);
4173 if (slen == 0)
return s - sbeg;
4174 e = RSTRING_END(str);
4175 t = RSTRING_PTR(sub);
4177 searchlen = s - sbeg + 1;
4180 hit = memrchr(sbeg, c, searchlen);
4183 if (hit != adjusted) {
4184 searchlen = adjusted - sbeg;
4187 if (memcmp(hit, t, slen) == 0)
4189 searchlen = adjusted - sbeg;
4190 }
while (searchlen > 0);
4201 sbeg = RSTRING_PTR(str);
4202 e = RSTRING_END(str);
4203 t = RSTRING_PTR(sub);
4204 slen = RSTRING_LEN(sub);
4207 if (memcmp(s, t, slen) == 0) {
4210 if (s <= sbeg)
break;
4211 s = rb_enc_prev_char(sbeg, s, e, enc);
4227 enc = rb_enc_check(str, sub);
4228 if (is_broken_string(sub))
return -1;
4229 singlebyte = single_byte_optimizable(str);
4230 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4231 slen = str_strlen(sub, enc);
4234 if (
len < slen)
return -1;
4235 if (
len - pos < slen) pos =
len - slen;
4236 if (
len == 0)
return pos;
4238 sbeg = RSTRING_PTR(str);
4241 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4247 s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
4248 return str_rindex(str, sub, s, enc);
4309rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4314 long pos,
len = str_strlen(str, enc);
4316 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4318 if (pos < 0 && (pos +=
len) < 0) {
4324 if (pos >
len) pos =
len;
4332 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4333 enc, single_byte_optimizable(str));
4344 pos = rb_str_rindex(str, sub, pos);
4354rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4360 enc = rb_enc_check(str, sub);
4361 if (is_broken_string(sub))
return -1;
4362 len = RSTRING_LEN(str);
4363 slen = RSTRING_LEN(sub);
4366 if (
len < slen)
return -1;
4367 if (
len - pos < slen) pos =
len - slen;
4368 if (
len == 0)
return pos;
4370 sbeg = RSTRING_PTR(str);
4373 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4380 return str_rindex(str, sub, s, enc);
4445rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4449 long pos,
len = RSTRING_LEN(str);
4451 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4453 if (pos < 0 && (pos +=
len) < 0) {
4459 if (pos >
len) pos =
len;
4465 str_ensure_byte_pos(str, pos);
4477 pos = rb_str_byterindex(str, sub, pos);
4478 if (pos >= 0)
return LONG2NUM(pos);
4514 switch (OBJ_BUILTIN_TYPE(y)) {
4566rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4573 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
4605rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4609 re = get_pat(argv[0]);
4610 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4619static enum neighbor_char
4627 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4629 return NEIGHBOR_NOT_CHAR;
4633 if (!l)
return NEIGHBOR_NOT_CHAR;
4634 if (l !=
len)
return NEIGHBOR_WRAPPED;
4635 rb_enc_mbcput(c, p, enc);
4636 r = rb_enc_precise_mbclen(p, p +
len, enc);
4638 return NEIGHBOR_NOT_CHAR;
4640 return NEIGHBOR_FOUND;
4643 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4646 return NEIGHBOR_WRAPPED;
4647 ++((
unsigned char*)p)[i];
4648 l = rb_enc_precise_mbclen(p, p+
len, enc);
4652 return NEIGHBOR_FOUND;
4655 memset(p+l, 0xff,
len-l);
4661 for (len2 =
len-1; 0 < len2; len2--) {
4662 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4666 memset(p+len2+1, 0xff,
len-(len2+1));
4671static enum neighbor_char
4678 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
4680 return NEIGHBOR_NOT_CHAR;
4683 if (!c)
return NEIGHBOR_NOT_CHAR;
4686 if (!l)
return NEIGHBOR_NOT_CHAR;
4687 if (l !=
len)
return NEIGHBOR_WRAPPED;
4688 rb_enc_mbcput(c, p, enc);
4689 r = rb_enc_precise_mbclen(p, p +
len, enc);
4691 return NEIGHBOR_NOT_CHAR;
4693 return NEIGHBOR_FOUND;
4696 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4699 return NEIGHBOR_WRAPPED;
4700 --((
unsigned char*)p)[i];
4701 l = rb_enc_precise_mbclen(p, p+
len, enc);
4705 return NEIGHBOR_FOUND;
4708 memset(p+l, 0,
len-l);
4714 for (len2 =
len-1; 0 < len2; len2--) {
4715 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4719 memset(p+len2+1, 0,
len-(len2+1));
4733static enum neighbor_char
4734enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4736 enum neighbor_char ret;
4740 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4744 const int max_gaps = 1;
4748 ctype = ONIGENC_CTYPE_DIGIT;
4750 ctype = ONIGENC_CTYPE_ALPHA;
4752 return NEIGHBOR_NOT_CHAR;
4755 for (
try = 0;
try <= max_gaps; ++
try) {
4756 ret = enc_succ_char(p,
len, enc);
4757 if (ret == NEIGHBOR_FOUND) {
4760 return NEIGHBOR_FOUND;
4767 ret = enc_pred_char(p,
len, enc);
4768 if (ret == NEIGHBOR_FOUND) {
4782 return NEIGHBOR_NOT_CHAR;
4785 if (ctype != ONIGENC_CTYPE_DIGIT) {
4787 return NEIGHBOR_WRAPPED;
4791 enc_succ_char(carry,
len, enc);
4792 return NEIGHBOR_WRAPPED;
4860 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
4861 rb_enc_cr_str_copy_for_substr(str, orig);
4862 return str_succ(str);
4869 char *sbeg, *s, *e, *last_alnum = 0;
4870 int found_alnum = 0;
4872 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4873 long carry_pos = 0, carry_len = 1;
4874 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4876 slen = RSTRING_LEN(str);
4877 if (slen == 0)
return str;
4879 enc = STR_ENC_GET(str);
4880 sbeg = RSTRING_PTR(str);
4881 s = e = sbeg + slen;
4883 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4884 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4890 l = rb_enc_precise_mbclen(s, e, enc);
4891 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4892 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4893 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4895 case NEIGHBOR_NOT_CHAR:
4897 case NEIGHBOR_FOUND:
4899 case NEIGHBOR_WRAPPED:
4904 carry_pos = s - sbeg;
4909 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4910 enum neighbor_char neighbor;
4911 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4912 l = rb_enc_precise_mbclen(s, e, enc);
4913 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4914 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4916 neighbor = enc_succ_char(tmp, l, enc);
4918 case NEIGHBOR_FOUND:
4922 case NEIGHBOR_WRAPPED:
4925 case NEIGHBOR_NOT_CHAR:
4928 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4930 enc_succ_char(s, l, enc);
4932 if (!rb_enc_asciicompat(enc)) {
4933 MEMCPY(carry, s,
char, l);
4936 carry_pos = s - sbeg;
4940 RESIZE_CAPA(str, slen + carry_len);
4941 sbeg = RSTRING_PTR(str);
4942 s = sbeg + carry_pos;
4943 memmove(s + carry_len, s, slen - carry_pos);
4944 memmove(s, carry, carry_len);
4946 STR_SET_LEN(str, slen);
4948 rb_enc_str_coderange(str);
4961rb_str_succ_bang(
VALUE str)
4969all_digits_p(
const char *s,
long len)
5023 VALUE end, exclusive;
5027 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5033 VALUE current, after_end;
5040 enc = rb_enc_check(beg, end);
5041 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5043 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5044 char c = RSTRING_PTR(beg)[0];
5045 char e = RSTRING_PTR(end)[0];
5047 if (c > e || (excl && c == e))
return beg;
5049 if ((*each)(rb_enc_str_new(&c, 1, enc), arg))
break;
5050 if (!excl && c == e)
break;
5052 if (excl && c == e)
break;
5057 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5058 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5059 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5064 b = rb_str_to_inum(beg, 10, FALSE);
5065 e = rb_str_to_inum(end, 10, FALSE);
5072 if (excl && bi == ei)
break;
5073 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5078 ID op = excl ?
'<' : idLE;
5079 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5084 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5085 b = rb_funcallv(b, succ, 0, 0);
5092 if (n > 0 || (excl && n == 0))
return beg;
5094 after_end = rb_funcallv(end, succ, 0, 0);
5099 next = rb_funcallv(current, succ, 0, 0);
5100 if ((*each)(current, arg))
break;
5101 if (
NIL_P(next))
break;
5105 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5120 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5121 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5122 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5124 b = rb_str_to_inum(beg, 10, FALSE);
5130 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5138 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5139 b = rb_funcallv(b, succ, 0, 0);
5145 VALUE next = rb_funcallv(current, succ, 0, 0);
5146 if ((*each)(current, arg))
break;
5149 if (RSTRING_LEN(current) == 0)
5160 if (!
rb_equal(str, *argp))
return 0;
5168 beg = rb_str_new_frozen(beg);
5170 end = rb_str_new_frozen(end);
5174 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5175 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5176 rb_enc_asciicompat(STR_ENC_GET(val))) {
5177 const char *bp = RSTRING_PTR(beg);
5178 const char *ep = RSTRING_PTR(end);
5179 const char *vp = RSTRING_PTR(val);
5180 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5181 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5189 if (b <= v && v < e)
return Qtrue;
5190 return RBOOL(!
RTEST(exclusive) && v == e);
5197 all_digits_p(bp, RSTRING_LEN(beg)) &&
5198 all_digits_p(ep, RSTRING_LEN(end))) {
5203 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5205 return RBOOL(
NIL_P(val));
5227 else if (RB_TYPE_P(indx,
T_REGEXP)) {
5228 return rb_str_subpat(str, indx,
INT2FIX(0));
5230 else if (RB_TYPE_P(indx,
T_STRING)) {
5231 if (rb_str_index(str, indx, 0) != -1)
5237 long beg,
len = str_strlen(str, NULL);
5249 return str_substr(str, idx, 1, FALSE);
5268rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5271 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5272 return rb_str_subpat(str, argv[0], argv[1]);
5281 return rb_str_aref(str, argv[0]);
5287 char *ptr = RSTRING_PTR(str);
5288 long olen = RSTRING_LEN(str), nlen;
5290 str_modifiable(str);
5291 if (
len > olen)
len = olen;
5293 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5295 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5297 ptr =
RSTRING(str)->as.embed.ary;
5298 memmove(ptr, oldptr +
len, nlen);
5299 if (fl == STR_NOEMBED)
xfree(oldptr);
5302 if (!STR_SHARED_P(str)) {
5304 rb_enc_cr_str_exact_copy(shared, str);
5309 STR_SET_LEN(str, nlen);
5311 if (!SHARABLE_MIDDLE_SUBSTRING) {
5312 TERM_FILL(ptr + nlen, TERM_LEN(str));
5319rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5325 if (beg == 0 && vlen == 0) {
5330 str_modify_keep_cr(str);
5334 RESIZE_CAPA(str, slen + vlen -
len);
5335 sptr = RSTRING_PTR(str);
5339 cr = rb_enc_str_coderange(val);
5344 memmove(sptr + beg + vlen,
5346 slen - (beg +
len));
5348 if (vlen < beg &&
len < 0) {
5352 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5355 STR_SET_LEN(str, slen);
5356 TERM_FILL(&sptr[slen], TERM_LEN(str));
5363 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5372 int singlebyte = single_byte_optimizable(str);
5378 enc = rb_enc_check(str, val);
5379 slen = str_strlen(str, enc);
5381 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5388 assert(beg <= slen);
5389 if (
len > slen - beg) {
5392 p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
5393 if (!p) p = RSTRING_END(str);
5394 e = str_nth(p, RSTRING_END(str),
len, enc, singlebyte);
5395 if (!e) e = RSTRING_END(str);
5397 beg = p - RSTRING_PTR(str);
5399 rb_str_update_0(str, beg,
len, val);
5400 rb_enc_associate(str, enc);
5411 long start, end,
len;
5421 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5425 nth += regs->num_regs;
5435 enc = rb_enc_check_str(str, val);
5436 rb_str_update_0(str, start,
len, val);
5437 rb_enc_associate(str, enc);
5445 switch (
TYPE(indx)) {
5447 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5451 beg = rb_str_index(str, indx, 0);
5505rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5508 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5509 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5517 return rb_str_aset(str, argv[0], argv[1]);
5577rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5585 str_modify_keep_cr(str);
5593 if ((nth += regs->num_regs) <= 0)
return Qnil;
5595 else if (nth >= regs->num_regs)
return Qnil;
5597 len = END(nth) - beg;
5600 else if (argc == 2) {
5609 beg = p - RSTRING_PTR(str);
5612 else if (RB_TYPE_P(indx,
T_STRING)) {
5613 beg = rb_str_index(str, indx, 0);
5614 if (beg == -1)
return Qnil;
5615 len = RSTRING_LEN(indx);
5627 beg = p - RSTRING_PTR(str);
5636 beg = p - RSTRING_PTR(str);
5640 rb_enc_cr_str_copy_for_substr(result, str);
5648 char *sptr = RSTRING_PTR(str);
5649 long slen = RSTRING_LEN(str);
5650 if (beg +
len > slen)
5654 slen - (beg +
len));
5656 STR_SET_LEN(str, slen);
5657 TERM_FILL(&sptr[slen], TERM_LEN(str));
5668 switch (OBJ_BUILTIN_TYPE(pat)) {
5687get_pat_quoted(
VALUE pat,
int check)
5691 switch (OBJ_BUILTIN_TYPE(pat)) {
5705 if (check && is_broken_string(pat)) {
5706 rb_exc_raise(rb_reg_check_preprocess(pat));
5712rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5715 pos = rb_str_byteindex(str, pat, pos);
5716 if (set_backref_str) {
5718 str = rb_str_new_frozen_String(str);
5719 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
5728 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5748rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5762 hash = rb_check_hash_type(argv[1]);
5768 pat = get_pat_quoted(argv[0], 1);
5770 str_modifiable(str);
5771 beg = rb_pat_search(pat, str, 0, 1);
5785 end0 = beg0 + RSTRING_LEN(pat);
5794 if (iter || !
NIL_P(hash)) {
5795 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5798 repl = rb_obj_as_string(
rb_yield(match0));
5801 repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5802 repl = rb_obj_as_string(repl);
5804 str_mod_check(str, p,
len);
5811 enc = rb_enc_compatible(str, repl);
5814 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
5818 rb_enc_name(str_enc),
5819 rb_enc_name(STR_ENC_GET(repl)));
5821 enc = STR_ENC_GET(repl);
5824 rb_enc_associate(str, enc);
5834 rlen = RSTRING_LEN(repl);
5835 len = RSTRING_LEN(str);
5837 RESIZE_CAPA(str,
len + rlen - plen);
5839 p = RSTRING_PTR(str);
5841 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
5843 rp = RSTRING_PTR(repl);
5844 memmove(p + beg0, rp, rlen);
5846 STR_SET_LEN(str,
len);
5847 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
5876 rb_str_sub_bang(argc, argv, str);
5881str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5884 long beg, beg0, end0;
5885 long offset, blen, slen,
len, last;
5886 enum {STR, ITER, MAP} mode = STR;
5888 int need_backref = -1;
5898 hash = rb_check_hash_type(argv[1]);
5907 rb_error_arity(argc, 1, 2);
5910 pat = get_pat_quoted(argv[0], 1);
5911 beg = rb_pat_search(pat, str, 0, need_backref);
5913 if (bang)
return Qnil;
5918 blen = RSTRING_LEN(str) + 30;
5919 dest = rb_str_buf_new(blen);
5920 sp = RSTRING_PTR(str);
5921 slen = RSTRING_LEN(str);
5923 str_enc = STR_ENC_GET(str);
5924 rb_enc_associate(dest, str_enc);
5932 end0 = beg0 + RSTRING_LEN(pat);
5943 val = rb_obj_as_string(
rb_yield(match0));
5946 val = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
5947 val = rb_obj_as_string(val);
5949 str_mod_check(str, sp, slen);
5954 else if (need_backref) {
5956 if (need_backref < 0) {
5957 need_backref = val != repl;
5964 len = beg0 - offset;
5966 rb_enc_str_buf_cat(dest, cp,
len, str_enc);
5969 rb_str_buf_append(dest, val);
5978 if (RSTRING_LEN(str) <= end0)
break;
5979 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
5980 rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0,
len, str_enc);
5981 offset = end0 +
len;
5983 cp = RSTRING_PTR(str) + offset;
5984 if (offset > RSTRING_LEN(str))
break;
5985 beg = rb_pat_search(pat, str, offset, need_backref);
5989 if (RSTRING_LEN(str) > offset) {
5990 rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
5992 rb_pat_search(pat, str, last, 1);
5994 str_shared_replace(str, dest);
6022rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6024 str_modify_keep_cr(str);
6025 return str_gsub(argc, argv, str, 1);
6048 return str_gsub(argc, argv, str, 0);
6066 str_modifiable(str);
6067 if (str == str2)
return str;
6071 return str_replace(str, str2);
6086rb_str_clear(
VALUE str)
6090 STR_SET_LEN(str, 0);
6091 RSTRING_PTR(str)[0] = 0;
6092 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6111rb_str_chr(
VALUE str)
6135 pos += RSTRING_LEN(str);
6136 if (pos < 0 || RSTRING_LEN(str) <= pos)
6139 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6158 long len = RSTRING_LEN(str);
6159 char *ptr, *head, *left = 0;
6163 if (pos < -
len ||
len <= pos)
6170 char byte = (char)(
NUM2INT(w) & 0xFF);
6172 if (!str_independent(str))
6173 str_make_independent(str);
6174 enc = STR_ENC_GET(str);
6175 head = RSTRING_PTR(str);
6177 if (!STR_EMBED_P(str)) {
6184 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6192 width = rb_enc_precise_mbclen(left, head+
len, enc);
6194 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6210str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6212 long n = RSTRING_LEN(str);
6214 if (beg > n ||
len < 0)
return Qnil;
6217 if (beg < 0)
return Qnil;
6222 if (!empty)
return Qnil;
6226 VALUE str2 = str_subseq(str, beg,
len);
6228 str_enc_copy_direct(str2, str);
6230 if (RSTRING_LEN(str2) == 0) {
6231 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6259 long beg,
len = RSTRING_LEN(str);
6267 return str_byte_substr(str, beg,
len, TRUE);
6272 return str_byte_substr(str, idx, 1, FALSE);
6319rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6324 return str_byte_substr(str, beg,
len, TRUE);
6327 return str_byte_aref(str, argv[0]);
6331str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6333 long end, slen = RSTRING_LEN(str);
6336 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6343 assert(*beg <= slen);
6344 if (*
len > slen - *beg) {
6348 str_ensure_byte_pos(str, *beg);
6349 str_ensure_byte_pos(str, end);
6374rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6376 long beg,
len, vbeg, vlen;
6382 if (!(argc == 2 || argc == 3 || argc == 5)) {
6383 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6387 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6388 rb_builtin_class_name(argv[0]));
6395 vlen = RSTRING_LEN(val);
6400 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6401 rb_builtin_class_name(argv[2]));
6413 vlen = RSTRING_LEN(val);
6421 str_check_beg_len(str, &beg, &
len);
6422 str_check_beg_len(val, &vbeg, &vlen);
6423 enc = rb_enc_check(str, val);
6424 str_modify_keep_cr(str);
6425 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6426 rb_enc_associate(str, enc);
6444rb_str_reverse(
VALUE str)
6451 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6452 enc = STR_ENC_GET(str);
6454 s = RSTRING_PTR(str); e = RSTRING_END(str);
6455 p = RSTRING_END(rev);
6458 if (RSTRING_LEN(str) > 1) {
6459 if (single_byte_optimizable(str)) {
6466 int clen = rb_enc_fast_mbclen(s, e, enc);
6474 cr = rb_enc_asciicompat(enc) ?
6477 int clen = rb_enc_mbclen(s, e, enc);
6486 STR_SET_LEN(rev, RSTRING_LEN(str));
6487 str_enc_copy_direct(rev, str);
6507rb_str_reverse_bang(
VALUE str)
6509 if (RSTRING_LEN(str) > 1) {
6510 if (single_byte_optimizable(str)) {
6513 str_modify_keep_cr(str);
6514 s = RSTRING_PTR(str);
6515 e = RSTRING_END(str) - 1;
6523 str_shared_replace(str, rb_str_reverse(str));
6527 str_modify_keep_cr(str);
6552 i = rb_str_index(str, arg, 0);
6554 return RBOOL(i != -1);
6596 rb_raise(rb_eArgError,
"invalid radix %d", base);
6598 return rb_str_to_inum(str, base, FALSE);
6622rb_str_to_f(
VALUE str)
6637rb_str_to_s(
VALUE str)
6649 char s[RUBY_MAX_CHAR_LEN];
6650 int n = rb_enc_codelen(c, enc);
6652 rb_enc_mbcput(c, s, enc);
6653 rb_enc_str_buf_cat(str, s, n, enc);
6657#define CHAR_ESC_LEN 13
6660rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6662 char buf[CHAR_ESC_LEN + 1];
6670 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6672 else if (c < 0x10000) {
6673 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6676 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6681 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6684 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6687 l = (int)strlen(buf);
6693ruby_escaped_char(
int c)
6696 case '\0':
return "\\0";
6697 case '\n':
return "\\n";
6698 case '\r':
return "\\r";
6699 case '\t':
return "\\t";
6700 case '\f':
return "\\f";
6701 case '\013':
return "\\v";
6702 case '\010':
return "\\b";
6703 case '\007':
return "\\a";
6704 case '\033':
return "\\e";
6705 case '\x7f':
return "\\c?";
6711rb_str_escape(
VALUE str)
6715 const char *p = RSTRING_PTR(str);
6716 const char *pend = RSTRING_END(str);
6717 const char *prev = p;
6718 char buf[CHAR_ESC_LEN + 1];
6719 VALUE result = rb_str_buf_new(0);
6720 int unicode_p = rb_enc_unicode_p(enc);
6721 int asciicompat = rb_enc_asciicompat(enc);
6726 int n = rb_enc_precise_mbclen(p, pend, enc);
6728 if (p > prev) str_buf_cat(result, prev, p - prev);
6731 n = (int)(pend - p);
6733 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6734 str_buf_cat(result, buf, strlen(buf));
6742 cc = ruby_escaped_char(c);
6744 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6745 str_buf_cat(result, cc, strlen(cc));
6751 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6752 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6756 if (p > prev) str_buf_cat(result, prev, p - prev);
6780 const char *p, *pend, *prev;
6781 char buf[CHAR_ESC_LEN + 1];
6782 VALUE result = rb_str_buf_new(0);
6783 rb_encoding *resenc = rb_default_internal_encoding();
6784 int unicode_p = rb_enc_unicode_p(enc);
6785 int asciicompat = rb_enc_asciicompat(enc);
6787 if (resenc == NULL) resenc = rb_default_external_encoding();
6788 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
6789 rb_enc_associate(result, resenc);
6790 str_buf_cat2(result,
"\"");
6792 p = RSTRING_PTR(str); pend = RSTRING_END(str);
6798 n = rb_enc_precise_mbclen(p, pend, enc);
6800 if (p > prev) str_buf_cat(result, prev, p - prev);
6803 n = (int)(pend - p);
6805 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6806 str_buf_cat(result, buf, strlen(buf));
6814 if ((asciicompat || unicode_p) &&
6815 (c ==
'"'|| c ==
'\\' ||
6820 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6821 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6822 str_buf_cat2(result,
"\\");
6823 if (asciicompat || enc == resenc) {
6829 case '\n': cc =
'n';
break;
6830 case '\r': cc =
'r';
break;
6831 case '\t': cc =
't';
break;
6832 case '\f': cc =
'f';
break;
6833 case '\013': cc =
'v';
break;
6834 case '\010': cc =
'b';
break;
6835 case '\007': cc =
'a';
break;
6836 case 033: cc =
'e';
break;
6837 default: cc = 0;
break;
6840 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6843 str_buf_cat(result, buf, 2);
6860 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6861 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6866 if (p > prev) str_buf_cat(result, prev, p - prev);
6867 str_buf_cat2(result,
"\"");
6872#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6892 int encidx = rb_enc_get_index(str);
6895 const char *p, *pend;
6898 int u8 = (encidx == rb_utf8_encindex());
6899 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6902 if (!rb_enc_asciicompat(enc)) {
6904 len += strlen(enc->name);
6907 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6910 unsigned char c = *p++;
6913 case '"':
case '\\':
6914 case '\n':
case '\r':
6915 case '\t':
case '\f':
6916 case '\013':
case '\010':
case '\007':
case '\033':
6921 clen = IS_EVSTR(p, pend) ? 2 : 1;
6929 if (u8 && c > 0x7F) {
6930 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6935 else if (cc <= 0xFFFFF)
6948 if (clen > LONG_MAX -
len) {
6955 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6956 q = RSTRING_PTR(result); qend = q +
len + 1;
6960 unsigned char c = *p++;
6962 if (c ==
'"' || c ==
'\\') {
6966 else if (c ==
'#') {
6967 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6970 else if (c ==
'\n') {
6974 else if (c ==
'\r') {
6978 else if (c ==
'\t') {
6982 else if (c ==
'\f') {
6986 else if (c ==
'\013') {
6990 else if (c ==
'\010') {
6994 else if (c ==
'\007') {
6998 else if (c ==
'\033') {
7008 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7013 snprintf(q, qend-q,
"u%04X", cc);
7015 snprintf(q, qend-q,
"u{%X}", cc);
7020 snprintf(q, qend-q,
"x%02X", c);
7026 if (!rb_enc_asciicompat(enc)) {
7027 snprintf(q, qend-q, nonascii_suffix, enc->name);
7028 encidx = rb_ascii8bit_encindex();
7031 rb_enc_associate_index(result, encidx);
7037unescape_ascii(
unsigned int c)
7061undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
7063 const char *s = *ss;
7067 unsigned char buf[6];
7074 rb_str_cat(undumped, s, 1);
7085 *buf = unescape_ascii(*s);
7086 rb_str_cat(undumped, (
char *)buf, 1);
7097 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7098 if (*penc != enc_utf8) {
7100 rb_enc_associate(undumped, enc_utf8);
7117 if (hexlen == 0 || hexlen > 6) {
7123 if (0xd800 <= c && c <= 0xdfff) {
7126 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7127 rb_str_cat(undumped, (
char *)buf, codelen);
7136 if (0xd800 <= c && c <= 0xdfff) {
7139 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7140 rb_str_cat(undumped, (
char *)buf, codelen);
7156 rb_str_cat(undumped, (
char *)buf, 1);
7160 rb_str_cat(undumped, s-1, 2);
7167static VALUE rb_str_is_ascii_only_p(
VALUE str);
7185str_undump(
VALUE str)
7187 const char *s = RSTRING_PTR(str);
7188 const char *s_end = RSTRING_END(str);
7190 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7192 bool binary =
false;
7196 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7199 if (!str_null_check(str, &w)) {
7202 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7203 if (*s !=
'"')
goto invalid_format;
7221 static const char force_encoding_suffix[] =
".force_encoding(\"";
7222 static const char dup_suffix[] =
".dup";
7223 const char *encname;
7228 size =
sizeof(dup_suffix) - 1;
7229 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7231 size =
sizeof(force_encoding_suffix) - 1;
7232 if (s_end - s <= size)
goto invalid_format;
7233 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7237 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7241 s = memchr(s,
'"', s_end-s);
7243 if (!s)
goto invalid_format;
7244 if (s_end - s != 2)
goto invalid_format;
7245 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7247 encidx = rb_enc_find_index2(encname, (
long)size);
7251 rb_enc_associate_index(undumped, encidx);
7261 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7264 rb_str_cat(undumped, s++, 1);
7272 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7278 if (rb_enc_dummy_p(enc)) {
7285str_true_enc(
VALUE str)
7288 rb_str_check_dummy_enc(enc);
7292static OnigCaseFoldType
7293check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7298 rb_raise(rb_eArgError,
"too many options");
7299 if (argv[0]==sym_turkic) {
7300 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7302 if (argv[1]==sym_lithuanian)
7303 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7305 rb_raise(rb_eArgError,
"invalid second option");
7308 else if (argv[0]==sym_lithuanian) {
7309 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7311 if (argv[1]==sym_turkic)
7312 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7314 rb_raise(rb_eArgError,
"invalid second option");
7318 rb_raise(rb_eArgError,
"too many options");
7319 else if (argv[0]==sym_ascii)
7320 flags |= ONIGENC_CASE_ASCII_ONLY;
7321 else if (argv[0]==sym_fold) {
7322 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7323 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7325 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7328 rb_raise(rb_eArgError,
"invalid option");
7335 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() ||
rb_enc_mbmaxlen(enc) == 1))
7341#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7342#ifndef CASEMAP_DEBUG
7343# define CASEMAP_DEBUG 0
7351 OnigUChar space[FLEX_ARY_LEN];
7355mapping_buffer_free(
void *p)
7359 while (current_buffer) {
7360 previous_buffer = current_buffer;
7361 current_buffer = current_buffer->next;
7362 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7368 {0, mapping_buffer_free,},
7369 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7377 const OnigUChar *source_current, *source_end;
7378 int target_length = 0;
7379 VALUE buffer_anchor;
7382 size_t buffer_count = 0;
7383 int buffer_length_or_invalid;
7385 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7387 source_current = (OnigUChar*)RSTRING_PTR(source);
7388 source_end = (OnigUChar*)RSTRING_END(source);
7392 while (source_current < source_end) {
7394 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7395 if (CASEMAP_DEBUG) {
7396 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7399 *pre_buffer = current_buffer;
7400 pre_buffer = ¤t_buffer->next;
7401 current_buffer->next = NULL;
7402 current_buffer->capa =
capa;
7403 buffer_length_or_invalid = enc->case_map(flags,
7404 &source_current, source_end,
7405 current_buffer->space,
7406 current_buffer->space+current_buffer->capa,
7408 if (buffer_length_or_invalid < 0) {
7409 current_buffer =
DATA_PTR(buffer_anchor);
7411 mapping_buffer_free(current_buffer);
7412 rb_raise(rb_eArgError,
"input string invalid");
7414 target_length += current_buffer->used = buffer_length_or_invalid;
7416 if (CASEMAP_DEBUG) {
7417 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7420 if (buffer_count==1) {
7421 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7424 char *target_current;
7427 target_current = RSTRING_PTR(target);
7428 current_buffer =
DATA_PTR(buffer_anchor);
7429 while (current_buffer) {
7430 memcpy(target_current, current_buffer->space, current_buffer->used);
7431 target_current += current_buffer->used;
7432 current_buffer = current_buffer->next;
7435 current_buffer =
DATA_PTR(buffer_anchor);
7437 mapping_buffer_free(current_buffer);
7442 str_enc_copy_direct(target, source);
7451 const OnigUChar *source_current, *source_end;
7452 OnigUChar *target_current, *target_end;
7453 long old_length = RSTRING_LEN(source);
7454 int length_or_invalid;
7456 if (old_length == 0)
return Qnil;
7458 source_current = (OnigUChar*)RSTRING_PTR(source);
7459 source_end = (OnigUChar*)RSTRING_END(source);
7460 if (source == target) {
7461 target_current = (OnigUChar*)source_current;
7462 target_end = (OnigUChar*)source_end;
7465 target_current = (OnigUChar*)RSTRING_PTR(target);
7466 target_end = (OnigUChar*)RSTRING_END(target);
7469 length_or_invalid = onigenc_ascii_only_case_map(flags,
7470 &source_current, source_end,
7471 target_current, target_end, enc);
7472 if (length_or_invalid < 0)
7473 rb_raise(rb_eArgError,
"input string invalid");
7474 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7475 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7476 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7477 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7478 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7481 str_enc_copy(target, source);
7487upcase_single(
VALUE str)
7489 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7490 bool modified =
false;
7493 unsigned int c = *(
unsigned char*)s;
7495 if (
'a' <= c && c <=
'z') {
7496 *s =
'A' + (c -
'a');
7524rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7527 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7529 flags = check_case_options(argc, argv, flags);
7530 str_modify_keep_cr(str);
7531 enc = str_true_enc(str);
7532 if (case_option_single_p(flags, enc, str)) {
7533 if (upcase_single(str))
7534 flags |= ONIGENC_CASE_MODIFIED;
7536 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7537 rb_str_ascii_casemap(str, str, &flags, enc);
7539 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7541 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7563rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7566 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7569 flags = check_case_options(argc, argv, flags);
7570 enc = str_true_enc(str);
7571 if (case_option_single_p(flags, enc, str)) {
7572 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7573 str_enc_copy_direct(ret, str);
7576 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7578 rb_str_ascii_casemap(str, ret, &flags, enc);
7581 ret = rb_str_casemap(str, &flags, enc);
7588downcase_single(
VALUE str)
7590 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7591 bool modified =
false;
7594 unsigned int c = *(
unsigned char*)s;
7596 if (
'A' <= c && c <=
'Z') {
7597 *s =
'a' + (c -
'A');
7626rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7629 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7631 flags = check_case_options(argc, argv, flags);
7632 str_modify_keep_cr(str);
7633 enc = str_true_enc(str);
7634 if (case_option_single_p(flags, enc, str)) {
7635 if (downcase_single(str))
7636 flags |= ONIGENC_CASE_MODIFIED;
7638 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7639 rb_str_ascii_casemap(str, str, &flags, enc);
7641 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7643 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7665rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7668 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7671 flags = check_case_options(argc, argv, flags);
7672 enc = str_true_enc(str);
7673 if (case_option_single_p(flags, enc, str)) {
7674 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7675 str_enc_copy_direct(ret, str);
7676 downcase_single(ret);
7678 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7680 rb_str_ascii_casemap(str, ret, &flags, enc);
7683 ret = rb_str_casemap(str, &flags, enc);
7711rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7714 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7716 flags = check_case_options(argc, argv, flags);
7717 str_modify_keep_cr(str);
7718 enc = str_true_enc(str);
7719 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7720 if (flags&ONIGENC_CASE_ASCII_ONLY)
7721 rb_str_ascii_casemap(str, str, &flags, enc);
7723 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7725 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7749rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7752 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7755 flags = check_case_options(argc, argv, flags);
7756 enc = str_true_enc(str);
7757 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
7758 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7760 rb_str_ascii_casemap(str, ret, &flags, enc);
7763 ret = rb_str_casemap(str, &flags, enc);
7790rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7793 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7795 flags = check_case_options(argc, argv, flags);
7796 str_modify_keep_cr(str);
7797 enc = str_true_enc(str);
7798 if (flags&ONIGENC_CASE_ASCII_ONLY)
7799 rb_str_ascii_casemap(str, str, &flags, enc);
7801 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7803 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7827rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7830 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7833 flags = check_case_options(argc, argv, flags);
7834 enc = str_true_enc(str);
7835 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
7836 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7838 rb_str_ascii_casemap(str, ret, &flags, enc);
7841 ret = rb_str_casemap(str, &flags, enc);
7846typedef unsigned char *USTR;
7850 unsigned int now, max;
7862 if (t->p == t->pend)
return -1;
7863 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7866 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7868 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7870 if (t->p < t->pend) {
7871 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7874 if (t->now < 0x80 && c < 0x80) {
7875 rb_raise(rb_eArgError,
7876 "invalid range \"%c-%c\" in string transliteration",
7880 rb_raise(rb_eArgError,
"invalid range in string transliteration");
7884 else if (t->now < c) {
7893 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7894 if (t->now == t->max) {
7899 if (t->now < t->max) {
7915 const unsigned int errc = -1;
7916 unsigned int trans[256];
7918 struct tr trsrc, trrepl;
7920 unsigned int c, c0, last = 0;
7921 int modify = 0, i, l;
7922 unsigned char *s, *send;
7924 int singlebyte = single_byte_optimizable(str);
7928#define CHECK_IF_ASCII(c) \
7929 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7930 (cr = ENC_CODERANGE_VALID) : 0)
7934 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7935 if (RSTRING_LEN(repl) == 0) {
7936 return rb_str_delete_bang(1, &src, str);
7940 e1 = rb_enc_check(str, src);
7941 e2 = rb_enc_check(str, repl);
7946 enc = rb_enc_check(src, repl);
7948 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
7949 if (RSTRING_LEN(src) > 1 &&
7950 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7951 trsrc.p + l < trsrc.pend) {
7955 trrepl.p = RSTRING_PTR(repl);
7956 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
7957 trsrc.gen = trrepl.gen = 0;
7958 trsrc.now = trrepl.now = 0;
7959 trsrc.max = trrepl.max = 0;
7962 for (i=0; i<256; i++) {
7965 while ((c = trnext(&trsrc, enc)) != errc) {
7970 if (!hash) hash = rb_hash_new();
7974 while ((c = trnext(&trrepl, enc)) != errc)
7977 for (i=0; i<256; i++) {
7978 if (trans[i] != errc) {
7986 for (i=0; i<256; i++) {
7989 while ((c = trnext(&trsrc, enc)) != errc) {
7990 r = trnext(&trrepl, enc);
7991 if (r == errc) r = trrepl.now;
7994 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
7997 if (!hash) hash = rb_hash_new();
8005 str_modify_keep_cr(str);
8006 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)RSTRING_END(str);
8010 long offset, max = RSTRING_LEN(str);
8011 unsigned int save = -1;
8012 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8017 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8018 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8027 if (cflag) c = last;
8030 else if (cflag) c = errc;
8036 if (c != (
unsigned int)-1) {
8042 tlen = rb_enc_codelen(c, enc);
8048 if (enc != e1) may_modify = 1;
8050 if ((offset = t - buf) + tlen > max) {
8051 size_t MAYBE_UNUSED(old) = max + termlen;
8052 max = offset + tlen + (send - s);
8053 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8056 rb_enc_mbcput(c, t, enc);
8057 if (may_modify && memcmp(s, t, tlen) != 0) {
8063 if (!STR_EMBED_P(str)) {
8064 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8066 TERM_FILL((
char *)t, termlen);
8067 RSTRING(str)->as.heap.ptr = (
char *)buf;
8068 STR_SET_LEN(str, t - buf);
8069 STR_SET_NOEMBED(str);
8070 RSTRING(str)->as.heap.aux.capa = max;
8074 c = (
unsigned char)*s;
8075 if (trans[c] != errc) {
8092 long offset, max = (long)((send - s) * 1.2);
8093 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8097 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
8098 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8106 if (cflag) c = last;
8109 else if (cflag) c = errc;
8113 c = cflag ? last : errc;
8116 tlen = rb_enc_codelen(c, enc);
8121 if (enc != e1) may_modify = 1;
8123 if ((offset = t - buf) + tlen > max) {
8124 size_t MAYBE_UNUSED(old) = max + termlen;
8125 max = offset + tlen + (long)((send - s) * 1.2);
8126 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8130 rb_enc_mbcput(c, t, enc);
8131 if (may_modify && memcmp(s, t, tlen) != 0) {
8139 if (!STR_EMBED_P(str)) {
8140 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8142 TERM_FILL((
char *)t, termlen);
8143 RSTRING(str)->as.heap.ptr = (
char *)buf;
8144 STR_SET_LEN(str, t - buf);
8145 STR_SET_NOEMBED(str);
8146 RSTRING(str)->as.heap.aux.capa = max;
8152 rb_enc_associate(str, enc);
8171 return tr_trans(str, src, repl, 0);
8218 tr_trans(str, src, repl, 0);
8222#define TR_TABLE_MAX (UCHAR_MAX+1)
8223#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8225tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8228 const unsigned int errc = -1;
8229 char buf[TR_TABLE_MAX];
8232 VALUE table = 0, ptable = 0;
8233 int i, l, cflag = 0;
8235 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8236 tr.gen =
tr.now =
tr.max = 0;
8238 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8243 for (i=0; i<TR_TABLE_MAX; i++) {
8246 stable[TR_TABLE_MAX] = cflag;
8248 else if (stable[TR_TABLE_MAX] && !cflag) {
8249 stable[TR_TABLE_MAX] = 0;
8251 for (i=0; i<TR_TABLE_MAX; i++) {
8255 while ((c = trnext(&
tr, enc)) != errc) {
8256 if (c < TR_TABLE_MAX) {
8257 buf[(
unsigned char)c] = !cflag;
8262 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8265 table = ptable ? ptable : rb_hash_new();
8269 table = rb_hash_new();
8274 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8275 rb_hash_aset(table, key,
Qtrue);
8279 for (i=0; i<TR_TABLE_MAX; i++) {
8280 stable[i] = stable[i] && buf[i];
8282 if (!table && !cflag) {
8289tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8291 if (c < TR_TABLE_MAX) {
8292 return table[c] != 0;
8298 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8299 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8303 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8306 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8320rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8322 char squeez[TR_TABLE_SIZE];
8325 VALUE del = 0, nodel = 0;
8327 int i, ascompat, cr;
8329 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8331 for (i=0; i<argc; i++) {
8335 enc = rb_enc_check(str, s);
8336 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8339 str_modify_keep_cr(str);
8340 ascompat = rb_enc_asciicompat(enc);
8341 s = t = RSTRING_PTR(str);
8342 send = RSTRING_END(str);
8348 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8359 c = rb_enc_codepoint_len(s, send, &clen, enc);
8361 if (tr_find(c, squeez, del, nodel)) {
8365 if (t != s) rb_enc_mbcput(c, t, enc);
8372 TERM_FILL(t, TERM_LEN(str));
8373 STR_SET_LEN(str, t - RSTRING_PTR(str));
8376 if (modify)
return str;
8396rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8399 rb_str_delete_bang(argc, argv, str);
8413rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8415 char squeez[TR_TABLE_SIZE];
8417 VALUE del = 0, nodel = 0;
8418 unsigned char *s, *send, *t;
8420 int ascompat, singlebyte = single_byte_optimizable(str);
8424 enc = STR_ENC_GET(str);
8427 for (i=0; i<argc; i++) {
8431 enc = rb_enc_check(str, s);
8432 if (singlebyte && !single_byte_optimizable(s))
8434 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8438 str_modify_keep_cr(str);
8439 s = t = (
unsigned char *)RSTRING_PTR(str);
8440 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8441 send = (
unsigned char *)RSTRING_END(str);
8443 ascompat = rb_enc_asciicompat(enc);
8447 unsigned int c = *s++;
8448 if (c != save || (argc > 0 && !squeez[c])) {
8458 if (ascompat && (c = *s) < 0x80) {
8459 if (c != save || (argc > 0 && !squeez[c])) {
8465 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8467 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8468 if (t != s) rb_enc_mbcput(c, t, enc);
8477 TERM_FILL((
char *)t, TERM_LEN(str));
8478 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8479 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8483 if (modify)
return str;
8506rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8509 rb_str_squeeze_bang(argc, argv, str);
8527 return tr_trans(str, src, repl, 1);
8550 tr_trans(str, src, repl, 1);
8579rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8581 char table[TR_TABLE_SIZE];
8583 VALUE del = 0, nodel = 0, tstr;
8593 enc = rb_enc_check(str, tstr);
8596 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8597 (ptstr = RSTRING_PTR(tstr),
8598 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8599 !is_broken_string(str)) {
8601 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8603 s = RSTRING_PTR(str);
8604 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8605 send = RSTRING_END(str);
8607 if (*(
unsigned char*)s++ == c) n++;
8613 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8614 for (i=1; i<argc; i++) {
8617 enc = rb_enc_check(str, tstr);
8618 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8621 s = RSTRING_PTR(str);
8622 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8623 send = RSTRING_END(str);
8624 ascompat = rb_enc_asciicompat(enc);
8628 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8636 c = rb_enc_codepoint_len(s, send, &clen, enc);
8637 if (tr_find(c, table, del, nodel)) {
8648rb_fs_check(
VALUE val)
8652 if (
NIL_P(val))
return 0;
8657static const char isspacetable[256] = {
8658 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8659 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8660 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8661 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8662 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8663 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8664 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8665 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8666 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8667 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8668 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8669 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8670 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8676#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8679split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8681 if (empty_count >= 0 &&
len == 0) {
8682 return empty_count + 1;
8684 if (empty_count > 0) {
8688 rb_ary_push(result, str_new_empty_String(str));
8689 }
while (--empty_count > 0);
8693 rb_yield(str_new_empty_String(str));
8694 }
while (--empty_count > 0);
8697 str = rb_str_subseq(str, beg,
len);
8699 rb_ary_push(result, str);
8708 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8712literal_split_pattern(
VALUE spat, split_type_t default_type)
8720 return SPLIT_TYPE_CHARS;
8722 else if (rb_enc_asciicompat(enc)) {
8723 if (
len == 1 && ptr[0] ==
' ') {
8724 return SPLIT_TYPE_AWK;
8729 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
8730 return SPLIT_TYPE_AWK;
8733 return default_type;
8746rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8751 split_type_t split_type;
8752 long beg, end, i = 0, empty_count = -1;
8757 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8759 if (lim <= 0) limit =
Qnil;
8760 else if (lim == 1) {
8761 if (RSTRING_LEN(str) == 0)
8772 if (
NIL_P(limit) && !lim) empty_count = 0;
8774 enc = STR_ENC_GET(str);
8775 split_type = SPLIT_TYPE_REGEXP;
8777 spat = get_pat_quoted(spat, 0);
8779 else if (
NIL_P(spat = rb_fs)) {
8780 split_type = SPLIT_TYPE_AWK;
8782 else if (!(spat = rb_fs_check(spat))) {
8783 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
8788 if (split_type != SPLIT_TYPE_AWK) {
8793 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8794 if (split_type == SPLIT_TYPE_AWK) {
8796 split_type = SPLIT_TYPE_STRING;
8801 mustnot_broken(spat);
8802 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8810#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8813 char *ptr = RSTRING_PTR(str);
8814 char *eptr = RSTRING_END(str);
8815 if (split_type == SPLIT_TYPE_AWK) {
8820 if (result) result = rb_ary_new();
8822 if (is_ascii_string(str)) {
8823 while (ptr < eptr) {
8824 c = (
unsigned char)*ptr++;
8826 if (ascii_isspace(c)) {
8832 if (!
NIL_P(limit) && lim <= i)
break;
8835 else if (ascii_isspace(c)) {
8836 SPLIT_STR(beg, end-beg);
8839 if (!
NIL_P(limit)) ++i;
8847 while (ptr < eptr) {
8850 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8859 if (!
NIL_P(limit) && lim <= i)
break;
8863 SPLIT_STR(beg, end-beg);
8866 if (!
NIL_P(limit)) ++i;
8874 else if (split_type == SPLIT_TYPE_STRING) {
8875 char *str_start = ptr;
8876 char *substr_start = ptr;
8877 char *sptr = RSTRING_PTR(spat);
8878 long slen = RSTRING_LEN(spat);
8880 if (result) result = rb_ary_new();
8881 mustnot_broken(str);
8882 enc = rb_enc_check(str, spat);
8883 while (ptr < eptr &&
8884 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8887 if (t != ptr + end) {
8891 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8894 if (!
NIL_P(limit) && lim <= ++i)
break;
8896 beg = ptr - str_start;
8898 else if (split_type == SPLIT_TYPE_CHARS) {
8899 char *str_start = ptr;
8902 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
8903 mustnot_broken(str);
8904 enc = rb_enc_get(str);
8905 while (ptr < eptr &&
8906 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8907 SPLIT_STR(ptr - str_start, n);
8909 if (!
NIL_P(limit) && lim <= ++i)
break;
8911 beg = ptr - str_start;
8914 if (result) result = rb_ary_new();
8915 long len = RSTRING_LEN(str);
8923 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8928 if (start == end && BEG(0) == END(0)) {
8933 else if (last_null == 1) {
8934 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8941 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8947 SPLIT_STR(beg, end-beg);
8948 beg = start = END(0);
8952 for (idx=1; idx < regs->num_regs; idx++) {
8953 if (BEG(idx) == -1)
continue;
8954 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8956 if (!
NIL_P(limit) && lim <= ++i)
break;
8958 if (match) rb_match_unbusy(match);
8960 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
8961 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
8964 return result ? result : str;
8974 return rb_str_split_m(1, &sep, str);
8977#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8983 rb_ary_push(ary, e);
8992#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8995chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8997 const char *prev = rb_enc_prev_char(p, e, e, enc);
9000 prev = rb_enc_prev_char(p, e, e, enc);
9001 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9013 RSTRING_LEN(rs) != 1 ||
9014 RSTRING_PTR(rs)[0] !=
'\n')) {
9020#define rb_rs get_rs()
9027 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9028 long pos,
len, rslen;
9034 static ID keywords[1];
9039 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9043 if (!ENUM_ELEM(ary, str)) {
9051 if (!RSTRING_LEN(str))
goto end;
9052 str = rb_str_new_frozen(str);
9053 ptr = subptr = RSTRING_PTR(str);
9054 pend = RSTRING_END(str);
9055 len = RSTRING_LEN(str);
9057 rslen = RSTRING_LEN(rs);
9059 if (rs == rb_default_rs)
9060 enc = rb_enc_get(str);
9062 enc = rb_enc_check(str, rs);
9067 const char *eol = NULL;
9069 while (subend < pend) {
9070 long chomp_rslen = 0;
9072 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9074 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9076 if (eol == subend)
break;
9080 chomp_rslen = -rslen;
9084 if (!subptr) subptr = subend;
9088 }
while (subend < pend);
9090 if (rslen == 0) chomp_rslen = 0;
9091 line = rb_str_subseq(str, subptr - ptr,
9092 subend - subptr + (chomp ? chomp_rslen : rslen));
9093 if (ENUM_ELEM(ary, line)) {
9094 str_mod_check(str, ptr,
len);
9096 subptr = eol = NULL;
9101 rsptr = RSTRING_PTR(rs);
9108 if ((rs == rb_default_rs) && !rb_enc_asciicompat(enc)) {
9111 rsptr = RSTRING_PTR(rs);
9112 rslen = RSTRING_LEN(rs);
9115 while (subptr < pend) {
9116 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9120 if (hit != adjusted) {
9124 subend = hit += rslen;
9127 subend = chomp_newline(subptr, subend, enc);
9133 line = rb_str_subseq(str, subptr - ptr, subend - subptr);
9134 if (ENUM_ELEM(ary, line)) {
9135 str_mod_check(str, ptr,
len);
9140 if (subptr != pend) {
9143 pend = chomp_newline(subptr, pend, enc);
9145 else if (pend - subptr >= rslen &&
9146 memcmp(pend - rslen, rsptr, rslen) == 0) {
9150 line = rb_str_subseq(str, subptr - ptr, pend - subptr);
9151 ENUM_ELEM(ary, line);
9172rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9175 return rb_str_enumerate_lines(argc, argv, str, 0);
9188rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9190 VALUE ary = WANTARRAY(
"lines", 0);
9191 return rb_str_enumerate_lines(argc, argv, str, ary);
9205 for (i=0; i<RSTRING_LEN(str); i++) {
9206 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9224rb_str_each_byte(
VALUE str)
9227 return rb_str_enumerate_bytes(str, 0);
9239rb_str_bytes(
VALUE str)
9241 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9242 return rb_str_enumerate_bytes(str, ary);
9259 str = rb_str_new_frozen(str);
9260 ptr = RSTRING_PTR(str);
9261 len = RSTRING_LEN(str);
9262 enc = rb_enc_get(str);
9265 for (i = 0; i <
len; i += n) {
9266 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9267 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9271 for (i = 0; i <
len; i += n) {
9272 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9273 ENUM_ELEM(ary, rb_str_subseq(str, i, n));
9293rb_str_each_char(
VALUE str)
9296 return rb_str_enumerate_chars(str, 0);
9308rb_str_chars(
VALUE str)
9311 return rb_str_enumerate_chars(str, ary);
9315rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9320 const char *ptr, *end;
9323 if (single_byte_optimizable(str))
9324 return rb_str_enumerate_bytes(str, ary);
9326 str = rb_str_new_frozen(str);
9327 ptr = RSTRING_PTR(str);
9328 end = RSTRING_END(str);
9329 enc = STR_ENC_GET(str);
9332 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9353rb_str_each_codepoint(
VALUE str)
9356 return rb_str_enumerate_codepoints(str, 0);
9368rb_str_codepoints(
VALUE str)
9371 return rb_str_enumerate_codepoints(str, ary);
9377 int encidx = rb_enc_to_index(enc);
9379 const OnigUChar source_ascii[] =
"\\X";
9380 const OnigUChar *source = source_ascii;
9381 size_t source_len =
sizeof(source_ascii) - 1;
9384#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9385#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9386#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9387#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9388#define CASE_UTF(e) \
9389 case ENCINDEX_UTF_##e: { \
9390 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9391 source = source_UTF_##e; \
9392 source_len = sizeof(source_UTF_##e); \
9395 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9403 regex_t *reg_grapheme_cluster;
9405 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9406 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9408 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9409 onig_error_code_to_str(message, r, &einfo);
9410 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9413 return reg_grapheme_cluster;
9419 int encidx = rb_enc_to_index(enc);
9420 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9422 if (encidx == rb_utf8_encindex()) {
9423 if (!reg_grapheme_cluster_utf8) {
9424 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9427 return reg_grapheme_cluster_utf8;
9436 size_t grapheme_cluster_count = 0;
9438 const char *ptr, *end;
9440 if (!rb_enc_unicode_p(enc)) {
9444 bool cached_reg_grapheme_cluster =
true;
9445 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9446 if (!reg_grapheme_cluster) {
9447 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9448 cached_reg_grapheme_cluster =
false;
9451 ptr = RSTRING_PTR(str);
9452 end = RSTRING_END(str);
9455 OnigPosition
len = onig_match(reg_grapheme_cluster,
9456 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9457 (
const OnigUChar *)ptr, NULL, 0);
9458 if (
len <= 0)
break;
9459 grapheme_cluster_count++;
9463 if (!cached_reg_grapheme_cluster) {
9464 onig_free(reg_grapheme_cluster);
9467 return SIZET2NUM(grapheme_cluster_count);
9471rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9475 const char *ptr0, *ptr, *end;
9477 if (!rb_enc_unicode_p(enc)) {
9478 return rb_str_enumerate_chars(str, ary);
9481 if (!ary) str = rb_str_new_frozen(str);
9483 bool cached_reg_grapheme_cluster =
true;
9484 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9485 if (!reg_grapheme_cluster) {
9486 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9487 cached_reg_grapheme_cluster =
false;
9490 ptr0 = ptr = RSTRING_PTR(str);
9491 end = RSTRING_END(str);
9494 OnigPosition
len = onig_match(reg_grapheme_cluster,
9495 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9496 (
const OnigUChar *)ptr, NULL, 0);
9497 if (
len <= 0)
break;
9498 ENUM_ELEM(ary, rb_str_subseq(str, ptr-ptr0,
len));
9502 if (!cached_reg_grapheme_cluster) {
9503 onig_free(reg_grapheme_cluster);
9523rb_str_each_grapheme_cluster(
VALUE str)
9526 return rb_str_enumerate_grapheme_clusters(str, 0);
9538rb_str_grapheme_clusters(
VALUE str)
9541 return rb_str_enumerate_grapheme_clusters(str, ary);
9545chopped_length(
VALUE str)
9548 const char *p, *p2, *beg, *end;
9550 beg = RSTRING_PTR(str);
9551 end = beg + RSTRING_LEN(str);
9552 if (beg >= end)
return 0;
9553 p = rb_enc_prev_char(beg, end, end, enc);
9555 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9556 p2 = rb_enc_prev_char(beg, p, end, enc);
9557 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9573rb_str_chop_bang(
VALUE str)
9575 str_modify_keep_cr(str);
9576 if (RSTRING_LEN(str) > 0) {
9578 len = chopped_length(str);
9579 STR_SET_LEN(str,
len);
9580 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9599rb_str_chop(
VALUE str)
9601 return rb_str_subseq(str, 0, chopped_length(str));
9605smart_chomp(
VALUE str,
const char *e,
const char *p)
9616 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9624 if (--e > p && *(e-1) ==
'\r') {
9641 char *pp, *e, *rsptr;
9643 char *
const p = RSTRING_PTR(str);
9644 long len = RSTRING_LEN(str);
9646 if (
len == 0)
return 0;
9648 if (rs == rb_default_rs) {
9649 return smart_chomp(str, e, p);
9652 enc = rb_enc_get(str);
9663 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9670 while (e > p && *(e-1) ==
'\n') {
9672 if (e > p && *(e-1) ==
'\r')
9678 if (rslen >
len)
return len;
9680 enc = rb_enc_get(rs);
9681 newline = rsptr[rslen-1];
9684 if (newline ==
'\n')
9685 return smart_chomp(str, e, p);
9689 return smart_chomp(str, e, p);
9693 enc = rb_enc_check(str, rs);
9694 if (is_broken_string(rs)) {
9698 if (p[
len-1] == newline &&
9700 memcmp(rsptr, pp, rslen) == 0)) {
9701 if (at_char_boundary(p, pp, e, enc))
9714chomp_rs(
int argc,
const VALUE *argv)
9730 long olen = RSTRING_LEN(str);
9731 long len = chompped_length(str, rs);
9733 str_modify_keep_cr(str);
9734 STR_SET_LEN(str,
len);
9735 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
9752rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9755 str_modifiable(str);
9756 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
9757 rs = chomp_rs(argc, argv);
9759 return rb_str_chomp_string(str, rs);
9772rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9774 VALUE rs = chomp_rs(argc, argv);
9776 return rb_str_subseq(str, 0, chompped_length(str, rs));
9782 const char *
const start = s;
9784 if (!s || s >= e)
return 0;
9787 if (single_byte_optimizable(str)) {
9788 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9793 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9813rb_str_lstrip_bang(
VALUE str)
9819 str_modify_keep_cr(str);
9820 enc = STR_ENC_GET(str);
9822 loffset = lstrip_offset(str, start, start+olen, enc);
9824 long len = olen-loffset;
9825 s = start + loffset;
9826 memmove(start, s,
len);
9827 STR_SET_LEN(str,
len);
9851rb_str_lstrip(
VALUE str)
9856 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
9857 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9858 return rb_str_subseq(str, loffset,
len - loffset);
9866 rb_str_check_dummy_enc(enc);
9870 if (!s || s >= e)
return 0;
9874 if (single_byte_optimizable(str)) {
9876 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9881 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
9901rb_str_rstrip_bang(
VALUE str)
9907 str_modify_keep_cr(str);
9908 enc = STR_ENC_GET(str);
9910 roffset = rstrip_offset(str, start, start+olen, enc);
9912 long len = olen - roffset;
9914 STR_SET_LEN(str,
len);
9938rb_str_rstrip(
VALUE str)
9944 enc = STR_ENC_GET(str);
9946 roffset = rstrip_offset(str, start, start+olen, enc);
9948 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9949 return rb_str_subseq(str, 0, olen-roffset);
9964rb_str_strip_bang(
VALUE str)
9967 long olen, loffset, roffset;
9970 str_modify_keep_cr(str);
9971 enc = STR_ENC_GET(str);
9973 loffset = lstrip_offset(str, start, start+olen, enc);
9974 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9976 if (loffset > 0 || roffset > 0) {
9977 long len = olen-roffset;
9980 memmove(start, start + loffset,
len);
9982 STR_SET_LEN(str,
len);
10006rb_str_strip(
VALUE str)
10009 long olen, loffset, roffset;
10013 loffset = lstrip_offset(str, start, start+olen, enc);
10014 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10016 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10017 return rb_str_subseq(str, loffset, olen-loffset-roffset);
10021scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10024 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10030 end = pos + RSTRING_LEN(pat);
10044 if (RSTRING_LEN(str) > end)
10045 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10046 RSTRING_END(str), enc);
10054 if (!regs || regs->num_regs == 1) {
10055 result = rb_str_subseq(str, pos, end - pos);
10060 for (
int i = 1; i < regs->num_regs; i++) {
10063 s = rb_str_subseq(str, BEG(i), END(i)-BEG(i));
10066 rb_ary_push(result, s);
10121 long last = -1, prev = 0;
10122 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10124 pat = get_pat_quoted(pat, 1);
10125 mustnot_broken(str);
10127 VALUE ary = rb_ary_new();
10129 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10132 rb_ary_push(ary, result);
10134 if (last >= 0) rb_pat_search(pat, str, last, 1);
10139 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10143 str_mod_check(str, p,
len);
10145 if (last >= 0) rb_pat_search(pat, str, last, 1);
10169rb_str_hex(
VALUE str)
10171 return rb_str_to_inum(str, 16, FALSE);
10196rb_str_oct(
VALUE str)
10198 return rb_str_to_inum(str, -8, FALSE);
10201#ifndef HAVE_CRYPT_R
10206 rb_nativethread_lock_t lock;
10207} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10210crypt_mutex_initialize(
void)
10281# define CRYPT_END() ALLOCV_END(databuf)
10283 extern char *crypt(
const char *,
const char *);
10284# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10287 const char *s, *saltp;
10290 char salt_8bit_clean[3];
10294 mustnot_wchar(str);
10295 mustnot_wchar(salt);
10297 saltp = RSTRING_PTR(salt);
10298 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10299 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10303 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10304 salt_8bit_clean[0] = saltp[0] & 0x7f;
10305 salt_8bit_clean[1] = saltp[1] & 0x7f;
10306 salt_8bit_clean[2] =
'\0';
10307 saltp = salt_8bit_clean;
10312# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10313 data->initialized = 0;
10315 res = crypt_r(s, saltp, data);
10317 crypt_mutex_initialize();
10319 res = crypt(s, saltp);
10360 char *ptr, *p, *pend;
10363 unsigned long sum0 = 0;
10368 ptr = p = RSTRING_PTR(str);
10369 len = RSTRING_LEN(str);
10375 str_mod_check(str, ptr,
len);
10378 sum0 += (
unsigned char)*p;
10389 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10390 sum0 &= (((
unsigned long)1)<<bits)-1;
10410rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10414 long width,
len, flen = 1, fclen = 1;
10417 const char *f =
" ";
10418 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10420 int singlebyte = 1, cr;
10424 enc = STR_ENC_GET(str);
10429 enc = rb_enc_check(str, pad);
10430 f = RSTRING_PTR(pad);
10431 flen = RSTRING_LEN(pad);
10432 fclen = str_strlen(pad, enc);
10433 singlebyte = single_byte_optimizable(pad);
10434 if (flen == 0 || fclen == 0) {
10435 rb_raise(rb_eArgError,
"zero width padding");
10438 len = str_strlen(str, enc);
10439 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10441 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10445 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10446 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10448 size = RSTRING_LEN(str);
10449 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10450 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10451 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10452 rb_raise(rb_eArgError,
"argument too big");
10456 p = RSTRING_PTR(res);
10458 memset(p, *f, llen);
10462 while (llen >= fclen) {
10468 memcpy(p, f, llen2);
10472 memcpy(p, RSTRING_PTR(str), size);
10475 memset(p, *f, rlen);
10479 while (rlen >= fclen) {
10485 memcpy(p, f, rlen2);
10489 TERM_FILL(p, termlen);
10490 STR_SET_LEN(res, p-RSTRING_PTR(res));
10491 rb_enc_associate(res, enc);
10513rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10515 return rb_str_justify(argc, argv, str,
'l');
10529rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10531 return rb_str_justify(argc, argv, str,
'r');
10546rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10548 return rb_str_justify(argc, argv, str,
'c');
10564 sep = get_pat_quoted(sep, 0);
10573 sep = rb_str_subseq(str, pos, END(0) - pos);
10576 pos = rb_str_index(str, sep, 0);
10577 if (pos < 0)
goto failed;
10579 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10581 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10582 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10585 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10599 long pos = RSTRING_LEN(str);
10601 sep = get_pat_quoted(sep, 0);
10610 sep = rb_str_subseq(str, pos, END(0) - pos);
10614 pos = rb_str_rindex(str, sep, pos);
10620 return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
10622 rb_str_subseq(str, pos+RSTRING_LEN(sep),
10623 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10625 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10637rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10641 for (i=0; i<argc; i++) {
10642 VALUE tmp = argv[i];
10644 if (rb_reg_start_with_p(tmp, str))
10648 const char *p, *s, *e;
10653 enc = rb_enc_check(str, tmp);
10654 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10655 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10656 p = RSTRING_PTR(str);
10659 if (!at_char_right_boundary(p, s, e, enc))
10661 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
10677rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10681 for (i=0; i<argc; i++) {
10682 VALUE tmp = argv[i];
10683 const char *p, *s, *e;
10688 enc = rb_enc_check(str, tmp);
10689 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10690 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10691 p = RSTRING_PTR(str);
10694 if (!at_char_boundary(p, s, e, enc))
10696 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
10712deleted_prefix_length(
VALUE str,
VALUE prefix)
10714 const char *strptr, *prefixptr;
10715 long olen, prefixlen;
10720 if (!is_broken_string(prefix) ||
10721 !rb_enc_asciicompat(enc) ||
10722 !rb_enc_asciicompat(rb_enc_get(prefix))) {
10723 enc = rb_enc_check(str, prefix);
10727 prefixlen = RSTRING_LEN(prefix);
10728 if (prefixlen <= 0)
return 0;
10729 olen = RSTRING_LEN(str);
10730 if (olen < prefixlen)
return 0;
10731 strptr = RSTRING_PTR(str);
10732 prefixptr = RSTRING_PTR(prefix);
10733 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10734 if (is_broken_string(prefix)) {
10735 if (!is_broken_string(str)) {
10739 const char *strend = strptr + olen;
10740 const char *after_prefix = strptr + prefixlen;
10741 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
10761rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10764 str_modify_keep_cr(str);
10766 prefixlen = deleted_prefix_length(str, prefix);
10767 if (prefixlen <= 0)
return Qnil;
10781rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10785 prefixlen = deleted_prefix_length(str, prefix);
10786 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10788 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
10801deleted_suffix_length(
VALUE str,
VALUE suffix)
10803 const char *strptr, *suffixptr;
10804 long olen, suffixlen;
10808 if (is_broken_string(suffix))
return 0;
10809 enc = rb_enc_check(str, suffix);
10812 suffixlen = RSTRING_LEN(suffix);
10813 if (suffixlen <= 0)
return 0;
10814 olen = RSTRING_LEN(str);
10815 if (olen < suffixlen)
return 0;
10816 strptr = RSTRING_PTR(str);
10817 suffixptr = RSTRING_PTR(suffix);
10818 const char *strend = strptr + olen;
10819 const char *before_suffix = strend - suffixlen;
10820 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
10821 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
10836rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10838 long olen, suffixlen,
len;
10839 str_modifiable(str);
10841 suffixlen = deleted_suffix_length(str, suffix);
10842 if (suffixlen <= 0)
return Qnil;
10844 olen = RSTRING_LEN(str);
10845 str_modify_keep_cr(str);
10846 len = olen - suffixlen;
10847 STR_SET_LEN(str,
len);
10848 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10864rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10868 suffixlen = deleted_suffix_length(str, suffix);
10869 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10871 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
10878 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
10886 val = rb_fs_check(val);
10889 "value of %"PRIsVALUE
" must be String or Regexp",
10893 rb_warn_deprecated(
"`$;'", NULL);
10910 str_modifiable(str);
10913 int idx = rb_enc_to_index(encoding);
10920 rb_enc_associate_index(str, idx);
10944 if (STR_EMBED_P(str)) {
10945 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
10950 str_replace_shared_without_enc(str2, str);
10952 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
10985rb_str_valid_encoding_p(
VALUE str)
10987 int cr = rb_enc_str_coderange(str);
11005rb_str_is_ascii_only_p(
VALUE str)
11007 int cr = rb_enc_str_coderange(str);
11015 static const char ellipsis[] =
"...";
11016 const long ellipsislen =
sizeof(ellipsis) - 1;
11018 const long blen = RSTRING_LEN(str);
11019 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11020 VALUE estr, ret = 0;
11027 else if (
len <= ellipsislen ||
11029 if (rb_enc_asciicompat(enc)) {
11031 rb_enc_associate(ret, enc);
11038 else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11039 rb_str_cat(ret, ellipsis, ellipsislen);
11043 rb_enc_from_encoding(enc), 0,
Qnil);
11054 cr = rb_enc_str_coderange(str);
11056 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11062 rb_enc_name(enc), rb_enc_name(e));
11081 if (enc == STR_ENC_GET(str)) {
11086 return enc_str_scrub(enc, str, repl, cr);
11094 const char *rep, *p, *e, *p1, *sp;
11100 rb_raise(rb_eArgError,
"both of block and replacement given");
11107 if (!
NIL_P(repl)) {
11108 repl = str_compat_and_valid(repl, enc);
11111 if (rb_enc_dummy_p(enc)) {
11114 encidx = rb_enc_to_index(enc);
11116#define DEFAULT_REPLACE_CHAR(str) do { \
11117 static const char replace[sizeof(str)-1] = str; \
11118 rep = replace; replen = (int)sizeof(replace); \
11121 slen = RSTRING_LEN(str);
11122 p = RSTRING_PTR(str);
11123 e = RSTRING_END(str);
11127 if (rb_enc_asciicompat(enc)) {
11133 else if (!
NIL_P(repl)) {
11134 rep = RSTRING_PTR(repl);
11135 replen = RSTRING_LEN(repl);
11138 else if (encidx == rb_utf8_encindex()) {
11139 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11143 DEFAULT_REPLACE_CHAR(
"?");
11148 p = search_nonascii(p, e);
11153 int ret = rb_enc_precise_mbclen(p, e, enc);
11167 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11172 if (e - p < clen) clen = e - p;
11179 for (; clen > 1; clen--) {
11180 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11191 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11192 str_mod_check(str, sp, slen);
11193 repl = str_compat_and_valid(repl, enc);
11200 p = search_nonascii(p, e);
11215 buf = rb_str_buf_new(RSTRING_LEN(str));
11226 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11227 str_mod_check(str, sp, slen);
11228 repl = str_compat_and_valid(repl, enc);
11241 else if (!
NIL_P(repl)) {
11242 rep = RSTRING_PTR(repl);
11243 replen = RSTRING_LEN(repl);
11245 else if (encidx == ENCINDEX_UTF_16BE) {
11246 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11248 else if (encidx == ENCINDEX_UTF_16LE) {
11249 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11251 else if (encidx == ENCINDEX_UTF_32BE) {
11252 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11254 else if (encidx == ENCINDEX_UTF_32LE) {
11255 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11258 DEFAULT_REPLACE_CHAR(
"?");
11262 int ret = rb_enc_precise_mbclen(p, e, enc);
11272 if (
NIL_P(buf)) buf = rb_str_buf_new(RSTRING_LEN(str));
11275 if (e - p < clen) clen = e - p;
11276 if (clen <= mbminlen * 2) {
11281 for (; clen > mbminlen; clen-=mbminlen) {
11282 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11292 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11293 str_mod_check(str, sp, slen);
11294 repl = str_compat_and_valid(repl, enc);
11309 buf = rb_str_buf_new(RSTRING_LEN(str));
11319 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11320 str_mod_check(str, sp, slen);
11321 repl = str_compat_and_valid(repl, enc);
11357str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11365static ID id_normalize;
11366static ID id_normalized_p;
11367static VALUE mUnicodeNormalize;
11370unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11372 static int UnicodeNormalizeRequired = 0;
11375 if (!UnicodeNormalizeRequired) {
11376 rb_require(
"unicode_normalize/normalize.rb");
11377 UnicodeNormalizeRequired = 1;
11381 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11418rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11420 return unicode_normalize_common(argc, argv, str, id_normalize);
11434rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11436 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11463rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11465 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11597#define sym_equal rb_obj_equal
11600sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11604 int c = rb_enc_precise_mbclen(s, send, enc);
11616rb_str_symname_p(
VALUE sym)
11621 rb_encoding *resenc = rb_default_internal_encoding();
11623 if (resenc == NULL) resenc = rb_default_external_encoding();
11624 enc = STR_ENC_GET(sym);
11625 ptr = RSTRING_PTR(sym);
11626 len = RSTRING_LEN(sym);
11627 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
11635rb_str_quote_unprintable(
VALUE str)
11643 resenc = rb_default_internal_encoding();
11644 if (resenc == NULL) resenc = rb_default_external_encoding();
11645 enc = STR_ENC_GET(str);
11646 ptr = RSTRING_PTR(str);
11647 len = RSTRING_LEN(str);
11648 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11649 !sym_printable(ptr, ptr +
len, enc)) {
11650 return rb_str_escape(str);
11656rb_id_quote_unprintable(
ID id)
11658 VALUE str = rb_id2str(
id);
11659 if (!rb_str_symname_p(str)) {
11660 return rb_str_escape(str);
11678sym_inspect(
VALUE sym)
11685 if (!rb_str_symname_p(str)) {
11687 len = RSTRING_LEN(str);
11688 rb_str_resize(str,
len + 1);
11689 dest = RSTRING_PTR(str);
11690 memmove(dest + 1, dest,
len);
11695 VALUE orig_str = str;
11698 str = rb_enc_str_new(0,
len + 1, enc);
11699 dest = RSTRING_PTR(str);
11700 memcpy(dest + 1, ptr,
len);
11726rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11731 rb_raise(rb_eArgError,
"no receiver given");
11828 return rb_str_match(
rb_sym2str(sym), other);
11843sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11845 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11858sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11860 return rb_str_match_m_p(argc, argv, sym);
11878 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11889sym_length(
VALUE sym)
11903sym_empty(
VALUE sym)
11921 return rb_str_intern(rb_str_upcase(argc, argv,
rb_sym2str(sym)));
11937sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11939 return rb_str_intern(rb_str_downcase(argc, argv,
rb_sym2str(sym)));
11953sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11955 return rb_str_intern(rb_str_capitalize(argc, argv,
rb_sym2str(sym)));
11969sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11971 return rb_str_intern(rb_str_swapcase(argc, argv,
rb_sym2str(sym)));
11983sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11985 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11998sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12000 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12012sym_encoding(
VALUE sym)
12018string_for_symbol(
VALUE name)
12037 name = string_for_symbol(name);
12038 return rb_intern_str(name);
12047 name = string_for_symbol(name);
12048 return rb_str_intern(name);
12071 return rb_fstring(str);
12078 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
12090 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12091 rb_enc_autoload(enc);
12095 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
12108 assert(rb_vm_fstring_table());
12109 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
12274 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isascii(), except it additionally takes an encoding.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
static OnigCodePoint rb_enc_mbc_to_codepoint(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_codepoint(), except it assumes the passed character is not broken.
static int rb_enc_mbminlen(rb_encoding *enc)
Queries the minimum number of bytes that the passed encoding needs to represent a character.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "defaultexternal" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "defaultexternal" encoding.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
This is the struct that holds necessary info for a struct.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.