Enumerator とブロックの省略の続き

現在のイテレータの回数を返す組み込み変数とかの方が嬉しそうです。

を試しに実装してみました。

3.times { p $# }
%w(a b c).each {|x| p [x, $#] }
$ ./ruby foo.rb
0
1
2
["a", 0]
["b", 1]
["c", 2]
3.times do
  i = $#
  3.times { p [i, $#] }
end
$ ./ruby foo.rb
[0, 0]
[0, 1]
[0, 2]
[1, 0]
[1, 1]
[1, 2]
[2, 0]
[2, 1]
[2, 2]

すごく便利そうなんだけどすごく邪悪な使い方ができそうで不安になります。バグもありそうだし、ruby-dev に投げる勇気はありませんでした。

以下パッチ。

Index: include/ruby/node.h
===================================================================
--- include/ruby/node.h	(revision 14072)
+++ include/ruby/node.h	(working copy)
@@ -131,6 +131,8 @@
 #define NODE_CONST       NODE_CONST
     NODE_CVAR,
 #define NODE_CVAR        NODE_CVAR
+    NODE_ITER_CNT,
+#define NODE_ITER_CNT    NODE_ITER_CNT
     NODE_NTH_REF,
 #define NODE_NTH_REF     NODE_NTH_REF
     NODE_BACK_REF,
@@ -412,6 +414,7 @@
 #define NEW_IVAR(v) NEW_NODE(NODE_IVAR,v,0,0)
 #define NEW_CONST(v) NEW_NODE(NODE_CONST,v,0,0)
 #define NEW_CVAR(v) NEW_NODE(NODE_CVAR,v,0,0)
+#define NEW_ITER_CNT(v)  NEW_NODE(NODE_ITER_CNT,v,0,0)
 #define NEW_NTH_REF(n)  NEW_NODE(NODE_NTH_REF,0,n,0)
 #define NEW_BACK_REF(n) NEW_NODE(NODE_BACK_REF,0,n,0)
 #define NEW_MATCH(c) NEW_NODE(NODE_MATCH,c,0,0)
Index: vm_core.h
===================================================================
--- vm_core.h	(revision 14072)
+++ vm_core.h	(working copy)
@@ -162,6 +162,8 @@
     int last_line;
     int flip_cnt;
     const rb_compile_option_t *option;
+    ID current_iter_cnt_id;
+    int iter_cnt_size;
 };
 
 #if 1
Index: compile.c
===================================================================
--- compile.c	(revision 14072)
+++ compile.c	(working copy)
@@ -111,12 +111,16 @@
 static int iseq_set_local_table(rb_iseq_t *iseq, ID *tbl);
 static int iseq_set_exception_local_table(rb_iseq_t *iseq);
 static int iseq_set_arguments(rb_iseq_t *iseq, LINK_ANCHOR *anchor, NODE * node);
+static int iseq_set_iter_cnt(rb_iseq_t *iseq, LINK_ANCHOR *anchor, NODE *node);
+static int iseq_inc_iter_cnt(rb_iseq_t *iseq, LINK_ANCHOR *anchor, NODE *node);
 
 static int iseq_set_sequence_stackcaching(rb_iseq_t *iseq, LINK_ANCHOR *anchor);
 static int iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *anchor);
 static int iseq_set_exception_table(rb_iseq_t *iseq);
 static int iseq_set_optargs_table(rb_iseq_t *iseq);
 
+static int get_dyna_var_idx_at_raw(rb_iseq_t *iseq, ID id);
+
 static int
 iseq_add_mark_object(rb_iseq_t *iseq, VALUE v)
 {
@@ -151,6 +155,8 @@
 	iseq_set_local_table(iseq, node->nd_tbl);
 	iseq_set_arguments(iseq, ret, node->nd_args);
 
+	iseq_set_iter_cnt(iseq, ret, node);
+
 	switch (iseq->type) {
 	  case ISEQ_TYPE_BLOCK: {
 	    LABEL *start = iseq->compile_data->start_label = NEW_LABEL(0);
@@ -182,6 +188,8 @@
 	    break;
 	  }
 	}
+
+	iseq_inc_iter_cnt(iseq, ret, node);
     }
     else {
 	switch (iseq->type) {
@@ -915,14 +923,17 @@
 static int
 iseq_set_local_table(rb_iseq_t *iseq, ID *tbl)
 {
-    int size;
+    int size, icnt_size;
+    ID icnt_id;
 
     if (tbl) {
-	size = *tbl;
-	tbl++;
+	size = tbl[0];
+	icnt_size = tbl[1];
+	icnt_id = tbl[2];
+	tbl += 3;
     }
     else {
-	size = 0;
+	size = icnt_size = icnt_id = 0;
     }
 
     if (size > 0) {
@@ -931,6 +942,8 @@
     }
 
     iseq->local_size = iseq->local_table_size = size;
+    iseq->compile_data->iter_cnt_size = icnt_size;
+    iseq->compile_data->current_iter_cnt_id = icnt_id;
 
     if (iseq->type == ISEQ_TYPE_METHOD ||
 	iseq->type == ISEQ_TYPE_CLASS  ||
@@ -942,6 +955,43 @@
     return COMPILE_OK;
 }
 
+static int
+iseq_set_iter_cnt(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *node)
+{
+    int i;
+    if (iseq->compile_data->iter_cnt_size) {
+	for (i=0; i < iseq->compile_data->iter_cnt_size; i++) {
+	    ADD_INSN1(ret, nd_line(node), putobject, INT2FIX(0));
+	    ADD_INSN2(ret, nd_line(node), setdynamic,
+		      INT2FIX(iseq->local_size - iseq->arg_size - i),
+		      INT2FIX(0));
+	}
+    }
+    return COMPILE_OK;
+}
+
+static int
+iseq_inc_iter_cnt(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE *node)
+{
+    int idx, lv, ls;
+    if (iseq->compile_data->current_iter_cnt_id) {
+	idx = get_dyna_var_idx_at_raw(iseq->parent_iseq,
+				      iseq->compile_data->current_iter_cnt_id);
+	if (idx < 0) {
+	    rb_bug("unknown iter_cnt (%s)",
+		   rb_id2name(iseq->compile_data->current_iter_cnt_id));
+	}
+	idx = iseq->parent_iseq->local_size - idx;
+	ADD_INSN2(ret, nd_line(node), getdynamic, INT2FIX(idx),
+		  INT2FIX(1));
+	ADD_INSN1(ret, nd_line(node), putobject, INT2FIX(1));
+	ADD_INSN(ret, nd_line(node), opt_plus);
+	ADD_INSN2(ret, nd_line(node), setdynamic, INT2FIX(idx),
+		  INT2FIX(1));
+    }
+    return COMPILE_OK;
+}
+
 /**
   ruby insn object array -> raw instruction sequence
  */
@@ -3890,6 +3940,19 @@
 	}
 	break;
       }
+      case NODE_ITER_CNT:{
+	int idx;
+	if (!poped) {
+	    idx = get_dyna_var_idx_at_raw(iseq->parent_iseq, node->nd_vid);
+	    if (idx < 0) {
+		rb_bug("unknown iter_cnt (%s)", rb_id2name(node->nd_vid));
+	    }
+	    ADD_INSN2(ret, nd_line(node), getdynamic,
+		      INT2FIX(iseq->parent_iseq->local_size - idx),
+		      INT2FIX(1));
+	}
+	break;
+      }
       case NODE_NTH_REF:{
         if (!poped) {
 	    ADD_INSN2(ret, nd_line(node), getspecial, INT2FIX(1) /* '~'  */,
Index: parse.y
===================================================================
--- parse.y	(revision 14072)
+++ parse.y	(working copy)
@@ -110,9 +110,16 @@
     struct vtable *prev;
 };
 
+struct icnt_stack {
+    ID id;
+    struct icnt_stack *prev;
+};
+
 struct local_vars {
     struct vtable *args;
+    struct vtable *icnt;
     struct vtable *vars;
+    struct icnt_stack *icnt_stack;
     struct local_vars *prev;
 };
 
@@ -190,6 +197,23 @@
     }
     return 0;
 }
+
+static struct icnt_stack *
+icnt_stack_alloc(struct icnt_stack *prev)
+{
+    struct icnt_stack *icnt = ALLOC(struct icnt_stack);
+    icnt->id = 0;
+    icnt->prev = prev;
+    return icnt;
+}
+
+static void
+icnt_stack_free(struct icnt_stack *icnt)
+{
+    if (POINTER_P(icnt)) {
+	xfree(icnt);
+    }
+}
 #endif
 
 /*
@@ -403,6 +427,8 @@
 
 static void rb_backref_error_gen(struct parser_params*,NODE*);
 #define rb_backref_error(n) rb_backref_error_gen(parser,n)
+static void rb_iter_cnt_error_gen(struct parser_params*);
+#define rb_iter_cnt_error() rb_iter_cnt_error_gen(parser)
 static NODE *node_assign_gen(struct parser_params*,NODE*,NODE*);
 #define node_assign(node1, node2) node_assign_gen(parser, node1, node2)
 
@@ -424,6 +450,9 @@
 static ID   internal_id_gen(struct parser_params*);
 #define internal_id() internal_id_gen(parser)
 
+static ID iter_cnt_gen(struct parser_params*);
+#define iter_cnt() iter_cnt_gen(parser);
+
 static void dyna_push_gen(struct parser_params*);
 #define dyna_push() dyna_push_gen(parser)
 static void dyna_pop_gen(struct parser_params*);
@@ -645,6 +674,7 @@
 %token <id>   tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL
 %token <node> tINTEGER tFLOAT tSTRING_CONTENT tCHAR
 %token <node> tNTH_REF tBACK_REF
+%token <node> tITER_CNT
 %token <num>  tREGEXP_END
 
 %type <node> singleton strings string string1 xstring regexp
@@ -1102,6 +1132,16 @@
 			$$ = dispatch3(opassign, $$, $4, $5);
 		    %*/
 		    }
+		| tITER_CNT tOP_ASGN command_call
+		    {
+		    /*%%%*/
+			rb_iter_cnt_error();
+			$$ = NEW_BEGIN(0);
+		    /*%
+			$$ = dispatch2(assign, dispatch1(var_field, $1), $3);
+			$$ = dispatch1(assign_error, $$);
+		    %*/
+		    }
 		| backref tOP_ASGN command_call
 		    {
 		    /*%%%*/
@@ -1554,6 +1594,16 @@
 			$$ = dispatch1(top_const_field, $2);
 		    %*/
 		    }
+		| tITER_CNT
+		    {
+		    /*%%%*/
+			rb_iter_cnt_error();
+			$$ = NEW_BEGIN(0);
+		    /*%
+			$$ = dispatch1(var_field, $1);
+			$$ = dispatch1(assign_error, $$);
+		    %*/
+		    }
 		| backref
 		    {
 		    /*%%%*/
@@ -1632,6 +1682,15 @@
 			}
 		    %*/
 		    }
+		| tITER_CNT
+		    {
+		    /*%%%*/
+			rb_iter_cnt_error();
+			$$ = NEW_BEGIN(0);
+		    /*%
+			$$ = dispatch1(assign_error, $1);
+		    %*/
+		    }
 		| backref
 		    {
 		    /*%%%*/
@@ -1919,6 +1978,17 @@
 			$$ = dispatch1(assign_error, $$);
 		    %*/
 		    }
+		| tITER_CNT tOP_ASGN arg
+		    {
+		    /*%%%*/
+			rb_iter_cnt_error();
+			$$ = NEW_BEGIN(0);
+		    /*%
+			$$ = dispatch1(var_field, $1);
+			$$ = dispatch3(opassign, $$, $2, $3);
+			$$ = dispatch1(assign_error, $$);
+		    %*/
+		    }
 		| backref tOP_ASGN arg
 		    {
 		    /*%%%*/
@@ -2496,6 +2566,15 @@
 		| qwords
 		| var_ref
 		| backref
+		| tITER_CNT
+		    {
+		    /*%%%*/
+			ID id = iter_cnt();
+			$$ = NEW_ITER_CNT(id);
+		    /*%
+			dispatch1(var_ref, $1);
+		    %*/
+		    }
 		| tFID
 		    {
 		    /*%%%*/
@@ -2751,7 +2830,7 @@
 			 *  e.each{|x| a, = x}
 			 */
 			ID id = internal_id();
-			ID *tbl = ALLOC_N(ID, 2);
+			ID *tbl = ALLOC_N(ID, 4);
 			NODE *m = NEW_ARGS_AUX(0, 0);
 			NODE *args, *scope;
 
@@ -2781,7 +2860,7 @@
 
 			args = new_args(m, 0, id, 0, 0);
 			scope = NEW_NODE(NODE_SCOPE, tbl, $8, args);
-			tbl[0] = 1; tbl[1] = id;
+			tbl[0] = 1; tbl[1] = tbl[2] = 0; tbl[3] = id;
 			$$ = NEW_FOR(0, $5, scope);
 			fixpos($$, $2);
 		    /*%
@@ -7021,6 +7100,9 @@
 	    set_yylval_node(NEW_NTH_REF(atoi(tok()+1)));
 	    return tNTH_REF;
 
+	  case '#':
+	    return tITER_CNT;
+
 	  default:
 	    if (!parser_is_identchar()) {
 		pushback(c);
@@ -7689,6 +7771,12 @@
 }
 
 static void
+rb_iter_cnt_error_gen(struct parser_params *parser)
+{
+    compile_error(PARSER_ARG "Can't set variable $#");
+}
+
+static void
 rb_backref_error_gen(struct parser_params *parser, NODE *node)
 {
     switch (nd_type(node)) {
@@ -8295,7 +8383,9 @@
     local = ALLOC(struct local_vars);
     local->prev = lvtbl;
     local->args = vtable_alloc(0);
+    local->icnt = vtable_alloc(inherit_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE);
     local->vars = vtable_alloc(inherit_dvars ? DVARS_INHERIT : DVARS_TOPSCOPE);
+    local->icnt_stack = icnt_stack_alloc(0);
     lvtbl = local;
 }
 
@@ -8304,7 +8394,9 @@
 {
     struct local_vars *local = lvtbl->prev;
     vtable_free(lvtbl->args);
+    vtable_free(lvtbl->icnt);
     vtable_free(lvtbl->vars);
+    icnt_stack_free(lvtbl->icnt_stack);
     xfree(lvtbl);
     lvtbl = local;
 }
@@ -8315,7 +8407,6 @@
     int i, cnt = vtable_size(src);
 
     if (cnt > 0) {
-        buf[0] = cnt;
         for (i = 0; i < cnt; i++) {
             buf[i] = src->tbl[i];
         }
@@ -8327,14 +8418,20 @@
 static ID*
 local_tbl_gen(struct parser_params *parser)
 {
-    int cnt = vtable_size(lvtbl->args) + vtable_size(lvtbl->vars);
+    int cnt_args = vtable_size(lvtbl->args);
+    int cnt_icnt = vtable_size(lvtbl->icnt);
+    int cnt_vars = vtable_size(lvtbl->vars);
+    int cnt_all = cnt_args + cnt_icnt + cnt_vars;
     ID *buf;
 
-    if (cnt <= 0) return 0;
-    buf = ALLOC_N(ID, cnt + 1);
-    vtable_tblcpy(buf+1, lvtbl->args);
-    vtable_tblcpy(buf+vtable_size(lvtbl->args)+1, lvtbl->vars);
-    buf[0] = cnt;
+    if (cnt_all <= 0 && lvtbl->icnt_stack->id == 0) return 0;
+    buf = ALLOC_N(ID, cnt_all + 3);
+    vtable_tblcpy(buf+3, lvtbl->args);
+    vtable_tblcpy(buf+cnt_args+3, lvtbl->icnt);
+    vtable_tblcpy(buf+cnt_args+cnt_icnt+3, lvtbl->vars);
+    buf[0] = cnt_all;
+    buf[1] = cnt_icnt;
+    buf[2] = lvtbl->icnt_stack->id;
     return buf;
 }
 
@@ -8353,6 +8450,36 @@
     return vtable_size(lvtbl->vars) - 1;
 }
 
+static ID
+iter_cnt_gen(struct parser_params *parser)
+{
+    if (POINTER_P(lvtbl->icnt) && POINTER_P(lvtbl->icnt->prev)) {
+	if (lvtbl->icnt_stack->id) {
+	    return lvtbl->icnt_stack->id;
+	}
+	else {
+	    ID id;
+	    struct vtable *args, *icnt, *vars;
+
+	    args = lvtbl->args; lvtbl->args = lvtbl->args->prev;
+	    icnt = lvtbl->icnt; lvtbl->icnt = lvtbl->icnt->prev;
+	    vars = lvtbl->vars; lvtbl->vars = lvtbl->vars->prev;
+	    id = internal_id();
+	    lvtbl->args = args;
+	    lvtbl->icnt = icnt;
+	    lvtbl->vars = vars;
+
+	    lvtbl->icnt_stack->id = id;
+	    vtable_add(lvtbl->icnt->prev, id);
+	    return id;
+	}
+    }
+    else {
+	yyerror("$# cannot be used out of block");
+    }
+    return 0;
+}
+
 static int
 local_id_gen(struct parser_params *parser, ID id)
 {
@@ -8379,20 +8506,29 @@
 dyna_push_gen(struct parser_params *parser)
 {
     lvtbl->args = vtable_alloc(lvtbl->args);
+    lvtbl->icnt = vtable_alloc(lvtbl->icnt);
     lvtbl->vars = vtable_alloc(lvtbl->vars);
+    lvtbl->icnt_stack = icnt_stack_alloc(lvtbl->icnt_stack);
 }
 
 static void
 dyna_pop_gen(struct parser_params *parser)
 {
     struct vtable *tmp;
+    struct icnt_stack *icnt;
 
     tmp = lvtbl->args;
     lvtbl->args = lvtbl->args->prev;
     vtable_free(tmp);
+    tmp = lvtbl->icnt;
+    lvtbl->icnt = lvtbl->icnt->prev;
+    vtable_free(tmp);
     tmp = lvtbl->vars;
     lvtbl->vars = lvtbl->vars->prev;
     vtable_free(tmp);
+    icnt = lvtbl->icnt_stack;
+    lvtbl->icnt_stack = lvtbl->icnt_stack->prev;
+    icnt_stack_free(icnt);
 }
 
 static int
@@ -8431,6 +8567,7 @@
 dvar_curr_gen(struct parser_params *parser, ID id)
 {
     return (vtable_included(lvtbl->args, id) ||
+	    vtable_included(lvtbl->icnt, id) ||
 	    vtable_included(lvtbl->vars, id));
 }
 
@@ -8646,7 +8783,8 @@
 static ID
 internal_id_gen(struct parser_params *parser)
 {
-    ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->vars);
+    ID id = (ID)vtable_size(lvtbl->args) + (ID)vtable_size(lvtbl->icnt) +
+	    (ID)vtable_size(lvtbl->vars);
     id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1;
     return ID_INTERNAL | (id << ID_SCOPE_SHIFT);
 }
@@ -9108,6 +9246,7 @@
         xfree(p->parser_tokenbuf);
     }
     for (local = p->parser_lvtbl; local; local = prev) {
+	if (local->icnt) xfree(local->icnt);
 	if (local->vars) xfree(local->vars);
 	prev = local->prev;
 	xfree(local);

$# が出現する箇所の parent_iseq に dvar のエントリを増やす感じです。$# を使わない限りほとんどパフォーマンス変化しないつもり。パースのコストは若干増えるけど。あと Ripper 周りは適当です。