CプリプロセッサでBrainfuck

Cプリプロセッサ上で実装されたBrainfuckインタプリタです。

$ cat hello.txt
# include "bfi.h"

BFI_RUN1(x x x x x x x x x (b x x x x x x x x b x x x x x x x x x x x b x x x x x d d d _) b w b x x w x x x x x x x w w x x x w b _ w
    _ _ _ _ _ _ _ _ _ _ _ _ w d x x x x x x x x w _ _ _ _ _ _ _ _ w x x x w _ _ _ _ _ _ w _ _ _ _ _ _ _ _ w b x w)
$ cpp -P hello.txt


Hello, world!

使い方

"bfi.h"で定義されているBFI_RUNマクロがインタプリタの本体です。第一引数にBrainfuckプログラム、第二引数に標準入力の内容を与えると、そのプログラムを実行し、出力をpretty printします。標準入力に何も与えたくない場合はBFI_RUN1マクロが使えます。

Cプリプロセッサでは通常の+-><.,[]を使ったBF構文をパースできないので、代わりにx_bdwrLRを使って下さい。例えば、標準入力を標準出力にコピーするプログラム

,[.[-],]

は、次のようになります。

r L w L _ R r R

LRの代わりに通常の括弧()を使うこともできます。こちらの方が読みやすいでしょう。ただしこの場合、括弧の中身が空であってはなりません。

r (w (_) r)

ppsyntax.cはこの変換を行なうスクリプトです。

BFI_RUNの第二引数には、標準入力の内容をバイト列で指定します。各バイトは二桁の十六進数で、a〜fには小文字を使ってください。文字コードはASCIIです。

BFI_RUN(r (w (_) r), 0x61 0x62 0x63) /* => abc */

英数字およびアンダーバーについては、その文字を直接書くこともできます。また、丸括弧()は、入れ子構造が正しく、中身が空でないという条件のもとで直接書くことができます。その他、0commaでコンマ、0_nで改行など略記法が定義されています。詳しくはlex.hを読んでください。

BFI_RUN(r (w (_) r), a b c (F 0comma 0space G) ) /* => abc(F, G) */

仕様

中身

BFI_RUNは大まかに次のように定義されています。

# define BFI_RUN(code, input) \
  BFI_FORMAT_PRETTY(          \
    BFI_EXEC(                 \
      BFI_BUILD(              \
        BFI_OPTIMIZE(         \
          BFI_GEN_FLAT(       \
            BFI_LEX(code)))), \
      BFI_LEX(input)))

例として、Daniel B. CristofaniのHello, worldを使います。

++++++++[>++++[>++>+++>+++>+<<<<-]>+>->+>>+[<]<-]>>.>
>---.+++++++..+++.>.<<-.>.+++.------.--------.>+.>++.

プリプロセッサ構文では、

x x x x x x x x (b x x x x (b x x b x x x b x x x b x d d d d _) b x b _ b x b b x (d) d _) b b w b
b _ _ _ w x x x x x x x w w x x x w b w d d _ w b w x x x w _ _ _ _ _ _ w _ _ _ _ _ _ _ _ w b x w b x x w

これをBFI_LEXに通すと、ASCIIコードからなる列が得られます。

BFI_LEX(
  x x x x x x x x (b x x x x (b x x b x x x b x x x b x d d d d _) b x b _ b x b b x (d) d _) b b w b
  b _ _ _ w x x x x x x x w w x x x w b w d d _ w b w x x x w _ _ _ _ _ _ w _ _ _ _ _ _ _ _ w b x w b x x w)
  /* =>
      (0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x28)(0x62)(0x78)(0x78)(0x78)(0x78)(0x28)(0x62)(0x78)
      (0x78)(0x62)(0x78)(0x78)(0x78)(0x62)(0x78)(0x78)(0x78)(0x62)(0x78)(0x64)(0x64)(0x64)(0x64)(0x5f)(0x29)
      (0x62)(0x78)(0x62)(0x5f)(0x62)(0x78)(0x62)(0x62)(0x78)(0x28)(0x64)(0x29)(0x64)(0x5f)(0x29)(0x62)(0x62)
      (0x77)(0x62)(0x62)(0x5f)(0x5f)(0x5f)(0x77)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x77)(0x77)(0x78)
      (0x78)(0x78)(0x77)(0x62)(0x77)(0x64)(0x64)(0x5f)(0x77)(0x62)(0x77)(0x78)(0x78)(0x78)(0x77)(0x5f)(0x5f)
      (0x5f)(0x5f)(0x5f)(0x5f)(0x77)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x77)(0x62)(0x78)(0x77)
      (0x62)(0x78)(0x78)(0x77)(0eof) */

BFI_GEN_FLATは、このバイト列を中間コードに変換します。

BFI_GEN_FLAT(
  (0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x28)(0x62)(0x78)(0x78)(0x78)(0x78)(0x28)(0x62)(0x78)
  (0x78)(0x62)(0x78)(0x78)(0x78)(0x62)(0x78)(0x78)(0x78)(0x62)(0x78)(0x64)(0x64)(0x64)(0x64)(0x5f)(0x29)
  (0x62)(0x78)(0x62)(0x5f)(0x62)(0x78)(0x62)(0x62)(0x78)(0x28)(0x64)(0x29)(0x64)(0x5f)(0x29)(0x62)(0x62)
  (0x77)(0x62)(0x62)(0x5f)(0x5f)(0x5f)(0x77)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x78)(0x77)(0x77)(0x78)
  (0x78)(0x78)(0x77)(0x62)(0x77)(0x64)(0x64)(0x5f)(0x77)(0x62)(0x77)(0x78)(0x78)(0x78)(0x77)(0x5f)(0x5f)
  (0x5f)(0x5f)(0x5f)(0x5f)(0x77)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x5f)(0x77)(0x62)(0x78)(0x77)
  (0x62)(0x78)(0x78)(0x77)(0eof)
) /* =>
      (6add(0x08))(6loop)(6next)(6add(0x04))(6loop)(6next)(6add(0x02))(6next)(6add(0x03))(6next)(6add(0x03))
      (6next)(6inc)(6prev)(6prev)(6prev)(6prev)(6dec)(6end(6loop))(6next)(6inc)(6next)(6dec)(6next)(6inc)(6next)
      (6next)(6inc)(6loop)(6prev)(6end(6loop))(6prev)(6dec)(6end(6loop))(6next)(6next)(6write)(6next)(6next)
      (6add(0xfd))(6write)(6add(0x07))(6write)(6write)(6add(0x03))(6write)(6next)(6write)(6prev)(6prev)(6dec)
      (6write)(6next)(6write)(6add(0x03))(6write)(6add(0xfa))(6write)(6add(0xf8))(6write)(6next)(6inc)(6write)
      (6next)(6add(0x02))(6write) */

隣り合う+命令と-命令はこの段階でまとめられます。

次に、BFI_OPTIMIZEが中間コードの最適化を行います。

BFI_OPTIMIZE(
  (6add(0x08))(6loop)(6next)(6add(0x04))(6loop)(6next)(6add(0x02))(6next)(6add(0x03))(6next)(6add(0x03))
  (6next)(6inc)(6prev)(6prev)(6prev)(6prev)(6dec)(6end(6loop))(6next)(6inc)(6next)(6dec)(6next)(6inc)(6next)
  (6next)(6inc)(6loop)(6prev)(6end(6loop))(6prev)(6dec)(6end(6loop))(6next)(6next)(6write)(6next)(6next)
  (6add(0xfd))(6write)(6add(0x07))(6write)(6write)(6add(0x03))(6write)(6next)(6write)(6prev)(6prev)(6dec)
  (6write)(6next)(6write)(6add(0x03))(6write)(6add(0xfa))(6write)(6add(0xf8))(6write)(6next)(6inc)(6write)
  (6next)(6add(0x02))(6write)
) /* =>
      (6add(0x08))(6loop)(6next1)(6add(0x04))(6senter)(6next1)(6sadd(0x02))(6next1)(6sadd(0x03))(6next1)
      (6sadd(0x03))(6next1)(6sadd(0x01))(6prevA(0x04))(6sexit)(6next1)(6inc)(6next1)(6dec)(6next1)(6inc)
      (6nextA(0x02))(6inc)(6loop)(6prev1)(6end(6loopA))(6prev1)(6dec)(6end(6loopA))(6nextA(0x02))(6writeA)
      (6nextA(0x02))(6add(0xfd))(6writeA)(6add(0x07))(6writeA)(6writeB)(6add(0x03))(6writeA)(6next1)(6writeA)
      (6prevA(0x02))(6dec)(6writeA)(6next1)(6writeA)(6add(0x03))(6writeA)(6add(0xfa))(6writeA)(6add(0xf8))
      (6writeA)(6next1)(6inc)(6writeA)(6next1)(6add(0x02))(6writeA) */

内側の単純なループが除去されました。

BFI_BUILDは、ネストのない中間コードからネストのある仮想マシンコードを生成します。

BFI_BUILD(
  (6add(0x08))(6loop)(6next1)(6add(0x04))(6senter)(6next1)(6sadd(0x02))(6next1)(6sadd(0x03))(6next1)
  (6sadd(0x03))(6next1)(6sadd(0x01))(6prevA(0x04))(6sexit)(6next1)(6inc)(6next1)(6dec)(6next1)(6inc)
  (6nextA(0x02))(6inc)(6loop)(6prev1)(6end(6loopA))(6prev1)(6dec)(6end(6loopA))(6nextA(0x02))(6writeA)
  (6nextA(0x02))(6add(0xfd))(6writeA)(6add(0x07))(6writeA)(6writeB)(6add(0x03))(6writeA)(6next1)(6writeA)
  (6prevA(0x02))(6dec)(6writeA)(6next1)(6writeA)(6add(0x03))(6writeA)(6add(0xfa))(6writeA)(6add(0xf8))
  (6writeA)(6next1)(6inc)(6writeA)(6next1)(6add(0x02))(6writeA)
) /* =>
      (6add,0x08)(6loopA,(6next1,~)(6add,0x04)(6senter,~)(6next1,~)(6sadd,0x02)(6next1,~)(6sadd,0x03)(6next1,~)
      (6sadd,0x03)(6next1,~)(6sadd,0x01)(6prevA,0x04)(6sexit,~)(6next1,~)(6inc,0x01)(6next1,~)(6dec,0xff)
      (6next1,~)(6inc,0x01)(6nextA,0x02)(6inc,0x01)(6loopA,(6prev1,~))(6prev1,~)(6dec,0xff))(6nextA,0x02)
      (6writeA,~)(6nextA,0x02)(6add,0xfd)(6writeA,~)(6add,0x07)(6writeA,~)(6writeB,~)(6add,0x03)(6writeA,~)
      (6next1,~)(6writeA,~)(6prevA,0x02)(6dec,0xff)(6writeA,~)(6next1,~)(6writeA,~)(6add,0x03)(6writeA,~)
      (6add,0xfa)(6writeA,~)(6add,0xf8)(6writeA,~)(6next1,~)(6inc,0x01)(6writeA,~)(6next1,~)(6add,0x02)(6writeA,~) */

BFI_EXECでこのコードを実行すると、出力バイト列が得られます。

BFI_EXEC(
  (6add,0x08)(6loopA,(6next1,~)(6add,0x04)(6senter,~)(6next1,~)(6sadd,0x02)(6next1,~)(6sadd,0x03)(6next1,~)
  (6sadd,0x03)(6next1,~)(6sadd,0x01)(6prevA,0x04)(6sexit,~)(6next1,~)(6inc,0x01)(6next1,~)(6dec,0xff)
  (6next1,~)(6inc,0x01)(6nextA,0x02)(6inc,0x01)(6loopA,(6prev1,~))(6prev1,~)(6dec,0xff))(6nextA,0x02)
  (6writeA,~)(6nextA,0x02)(6add,0xfd)(6writeA,~)(6add,0x07)(6writeA,~)(6writeB,~)(6add,0x03)(6writeA,~)
  (6next1,~)(6writeA,~)(6prevA,0x02)(6dec,0xff)(6writeA,~)(6next1,~)(6writeA,~)(6add,0x03)(6writeA,~)
  (6add,0xfa)(6writeA,~)(6add,0xf8)(6writeA,~)(6next1,~)(6inc,0x01)(6writeA,~)(6next1,~)(6add,0x02)(6writeA,~),
  (0eof)
) /* => (0x48)(0x65)(0x6c)(0x6c)(0x6f)(0x20)(0x57)(0x6f)(0x72)(0x6c)(0x64)(0x21)(0x0a)(0eof) */

最後に、この出力をBFI_FORMAT_PRETTYに通せば最終結果が得られます。

BFI_FORMAT_PRETTY((0x48)(0x65)(0x6c)(0x6c)(0x6f)(0x20)(0x57)(0x6f)(0x72)(0x6c)(0x64)(0x21)(0x0a)(0eof))
  /* => Hello, World!'\n' */

Cプリプロセッサ上で言語処理系を書くために仮想マシンを使うというアイディアと、そのための仮想マシンの効率的な実装は、Order Preprocessorを参考にしています。