Haskell: actual IO monad implementation, in different language?
How is IO monad actually implemented?in sense of, what would be the actual implementation of the main
function?
How would I call haskell function (IO) from another language and do I in that case need to maintain IO my self?
Does main
pulls IO actions (Lazily) as references and then call them?
Or it is interpreter job, when it found actions in its way it can call them?
Or maybe something else?
Is there good IO monad implementation in different language which can help to deeply understand what happening in main function?
Edit:
Such hGetContents
confuses me a lot, and makes me unsure how IO is really implemented.
Ok, let's say I have very simple pure Haskell interpreter witch unfortunately has no IO support and for curiosity I want to add this IO actions to it (unsafeIO
tricks also). It is hard to get it from GHC, Hugs or others.
Here is an example of how one could implement the IO monad in Java:
package so.io;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import static so.io.IOMonad.*;
import static so.io.ConsoleIO.*;
/**
* This is a type containing no data -- corresponds to () in Haskell.
*/
class Unit {
public final static Unit VALUE = new Unit();
}
/**
* This type represents a function from A to R
*/
interface Function<A,R> {
public R apply(A argument);
}
/**
* This type represents an action, yielding type R
*/
interface IO<R> {
/**
* Warning! May have arbitrary side-effects!
*/
R unsafePerformIO();
}
/**
* This class, internally impure, provides pure interface for action sequencing (aka Monad)
*/
class IOMonad {
static <T> IO<T> pure(final T value) {
return new IO<T>() {
@Override
public T unsafePerformIO() {
return value;
}
};
}
static <T> IO<T> join(final IO<IO<T>> action) {
return new IO<T>(){
@Override
public T unsafePerformIO() {
return action.unsafePerformIO().unsafePerformIO();
}
};
}
static <A,B> IO<B> fmap(final Function<A,B> func, final IO<A> action) {
return new IO<B>(){
@Override
public B unsafePerformIO() {
return func.apply(action.unsafePerformIO());
}
};
}
static <A,B> IO<B> bind(IO<A> action, Function<A, IO<B>> func) {
return join(fmap(func, action));
}
}
/**
* This class, internally impure, provides pure interface for interaction with stdin and stdout
*/
class ConsoleIO {
static IO<Unit> putStrLn(final String line) {
return new IO<Unit>() {
@Override
public Unit unsafePerformIO() {
System.out.println(line);
return Unit.VALUE;
}
};
};
// Java does not have first-class functions, thus this:
final static Function<String, IO<Unit>> putStrLn = new Function<String, IO<Unit>>() {
@Override
public IO<Unit> apply(String argument) {
return putStrLn(argument);
}
};
final static BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
static IO<String> getLine = new IO<String>() {
@Override
public String unsafePerformIO() {
try {
return in.readLine();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
/**
* The program composed out of IO actions in a purely functional manner.
*/
class Main {
/**
* A variant of bind, which discards the bound value.
*/
static IO<Unit> bind_(final IO<Unit> a, final IO<Unit> b) {
return bind(a, new Function<Unit, IO<Unit>>(){
@Override
public IO<Unit> apply(Unit argument) {
return b;
}
});
}
/**
* The greeting action -- asks the user for his name and then prints a greeting
*/
final static IO<Unit> greet =
bind_(putStrLn("Enter your name:"),
bind(getLine, new Function<String, IO<Unit>>(){
@Override
public IO<Unit> apply(String argument) {
return putStrLn("Hello, " + argument + "!");
}
}));
/**
* A simple echo action -- reads a line, prints it back
*/
final static IO<Unit> echo = bind(getLine, putStrLn);
/**
* A function taking some action and producing the same action run repeatedly forever (modulo stack overflow :D)
*/
static IO<Unit> loop(final IO<Unit> action) {
return bind(action, new Function<Unit, IO<Unit>>(){
@Override
public IO<Unit> apply(Unit argument) {
return loop(action);
}
});
}
/**
* The action corresponding to the whole program
*/
final static IO<Unit> main = bind_(greet, bind_(putStrLn("Entering the echo loop."),loop(echo)));
}
/**
* The runtime system, doing impure stuff to actually run our program.
*/
public class RTS {
public static void main(String[] args) {
Main.main.unsafePerformIO();
}
}
This is a runtime system implementing interface to the console I/O together with a small purely functional program which greets the user and then runs an echo loop.
One can't implement the unsafe part in Haskell because Haskell is purely functional language. It is always implemented with lower-level facilities.
With Java 8 Lambdas, you can take the code from Rotsor's answer above, remove the Function class as Java 8 provides a FunctionalInterface with does the same thing and remove the anonymous class cruft to achieve cleaner looking code like so:
package so.io;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.function.Function;
import static so.io.IOMonad.*;
import static so.io.ConsoleIO.*;
/**
* This is a type containing no data -- corresponds to () in Haskell.
*/
class Unit {
// -- Unit$
public final static Unit VALUE = new Unit();
private Unit() {
}
}
/** This type represents an action, yielding type R */
@FunctionalInterface
interface IO<R> {
/** Warning! May have arbitrary side-effects! */
R unsafePerformIO();
}
/**
* This, internally impure, provides pure interface for action sequencing (aka
* Monad)
*/
interface IOMonad {
// -- IOMonad$
static <T> IO<T> pure(final T value) {
return () -> value;
}
static <T> IO<T> join(final IO<IO<T>> action) {
return () -> action.unsafePerformIO().unsafePerformIO();
}
static <A, B> IO<B> fmap(final Function<A, B> func, final IO<A> action) {
return () -> func.apply(action.unsafePerformIO());
}
static <A, B> IO<B> bind(IO<A> action, Function<A, IO<B>> func) {
return join(fmap(func, action));
}
}
/**
* This, internally impure, provides pure interface for interaction with stdin
* and stdout
*/
interface ConsoleIO {
// -- ConsoleIO$
static IO<Unit> putStrLn(final String line) {
return () -> {
System.out.println(line);
return Unit.VALUE;
};
};
final static Function<String, IO<Unit>> putStrLn = arg -> putStrLn(arg);
final static BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
static IO<String> getLine = () -> {
try {
return in.readLine();
}
catch (IOException e) {
throw new RuntimeException(e);
}
};
}
/** The program composed out of IO actions in a purely functional manner. */
interface Main {
// -- Main$
/** A variant of bind, which discards the bound value. */
static IO<Unit> bind_(final IO<Unit> a, final IO<Unit> b) {
return bind(a, arg -> b);
}
/**
* The greeting action -- asks the user for his name and then prints
* greeting
*/
final static IO<Unit> greet = bind_(putStrLn("Enter your name:"),
bind(getLine, arg -> putStrLn("Hello, " + arg + "!")));
/** A simple echo action -- reads a line, prints it back */
final static IO<Unit> echo = bind(getLine, putStrLn);
/**
* A function taking some action and producing the same action run repeatedly
* forever (modulo stack overflow :D)
*/
static IO<Unit> loop(final IO<Unit> action) {
return bind(action, arg -> loop(action));
}
/** The action corresponding to the whole program */
final static IO<Unit> main = bind_(greet, bind_(putStrLn("Entering the echo loop."), loop(echo)));
}
/** The runtime system, doing impure stuff to actually run our program. */
public interface RTS {
// -- RTS$
public static void main(String[] args) {
Main.main.unsafePerformIO();
}
}
Note that I also changed class-declared static methods to interface-declared static methods. Why? No particular reason, just that you can in Java 8.
If you want to understand the implementation of the IO monad, it's very well described in an award-winning paper by Phil Wadler and Simon Peyton Jones, who were the ones who figured out how to use monads to do input/output in a pure language. The paper is Imperative Functional Programming and is on both authors' web sites.
The IO
monad is basically implemented as a state transformer (similar to State
), with a special token RealWorld
. Each IO operation depends on this token and passes it when it finishes. unsafeInterleaveIO
introduces a second token, so that a new IO operation can start, while the other one is still doing its work.
Usually, you don't have to care about the implementation. If you want to call IO-functions from other languages, GHC cares about removing the IO wrapper. Consider this small snippet:
printInt :: Int -> IO ()
printInt int = do putStr "The argument is: "
print int
foreign export ccall printInt :: Int -> IO ()
This generates a symbol to call printInt
from C. The function becomes:
extern void printInt(HsInt a1);
Where HsInt
is just a (depending on your platform) typedef
d int
. So you see, the monad IO
has been removed completely.
Below is the actual implementation of IO
in GHC 7.10.
The IO
type is essentially a state monad at type State# RealWorld
(defined in GHC.Types
):
{- |
A value of type @'IO' a@ is a computation which, when performed,
does some I\/O before returning a value of type @a@.
There is really only one way to \"perform\" an I\/O action: bind it to
@Main.main@ in your program. When your program is run, the I\/O will
be performed. It isn't possible to perform I\/O from an arbitrary
function, unless that function is itself in the 'IO' monad and called
at some point, directly or indirectly, from @Main.main@.
'IO' is a monad, so 'IO' actions can be combined using either the do-notation
or the '>>' and '>>=' operations from the 'Monad' class.
-}
newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))
The IO
monad is strict, because bindIO
is defined by case
matching (defined in GHC.Base
):
instance Monad IO where
{-# INLINE return #-}
{-# INLINE (>>) #-}
{-# INLINE (>>=) #-}
m >> k = m >>= \ _ -> k
return = returnIO
(>>=) = bindIO
fail s = failIO s
returnIO :: a -> IO a
returnIO x = IO $ \ s -> (# s, x #)
bindIO :: IO a -> (a -> IO b) -> IO b
bindIO (IO m) k = IO $ \ s -> case m s of (# new_s, a #) -> unIO (k a) new_s
This implementation is discussed in a blog post by Edward Yang.
I will leave the question of implementing IO to other people who know a bit more. (Though I will point out, as I'm sure they will as well, that the real question isn't "How is IO implemented in Haskell?" but rather "How is IO implemented in GHC?" or "How is IO implemented in Hugs?", etc. I imagine the implementations vary enormously.) However, this question:
how to call haskell function (IO) from another language and do I in that case need to maintain IO my self?
...is answered in depth in the FFI specification.
In fact "IO a" is just "() -> a" in an impure language (where functions can have side effect). Let's say you want to implement IO in SML :
structure Io : MONAD =
struct
type 'a t = unit -> 'a
return x = fn () => x
fun (ma >>= g) () = let a = ma ()
in g a ()
executeIo ma = ma ()
end
Note: my experience with Clean is minimal - consider yourself warned!
Based on System.IO, using a variation of an approach described by F. Warren Burton:
-
definition module System.Alt.IO from Control.Applicative import class pure, class <*>, class Applicative from Data.Functor import class Functor from Control.Monad import class Monad from StdOverloaded import class toString from System._OI import OI :: IO a = IO .(*OI -> a) execIO :: !(IO a) !*World -> *World evalIO :: !(IO a) !*World -> *(a, !*World) withOI :: (*OI -> .a) -> IO .a putStr :: String -> IO () putStrLn :: String -> IO () print :: a -> IO () | toString a getChar :: IO Char getLine :: IO String readFileM :: !String -> IO String writeFileM :: !String !String -> IO () instance Functor IO instance pure IO instance <*> IO instance Monad IO unsafePerformIO :: !(*OI -> .a) -> .a unsafePerformIOTrue :: !(*OI -> a) -> Bool
-
implementation module System.Alt.IO import StdFile from StdFunc import o, id import StdMisc import StdString import System._OI import Control.Applicative import Control.Monad import Data.Functor from Text import class Text (trim), instance Text String execIO :: !(IO a) !*World -> *World execIO (IO g) world # (u, world) = newOI world #! x = g u = world evalIO :: !(IO a) !*World -> *(a, !*World) evalIO (IO g) world # (u, world) = newOI world #! x = g u = (x, world) withWorld :: (*World -> *(.a, *World)) -> IO .a withWorld f = IO g where g u # (x, world) = f (getWorld u) = from_world "withWorld" x world instance Functor IO where fmap f x = x >>= (lift o f) instance pure IO where pure x = IO (\u -> case partOI u of (_, _) = x) instance <*> IO where (<*>) f g = liftA2 id f g instance Monad IO where bind ma a2mb = IO (run ma) where run (IO g) u # (u1, u2) = partOI u #! x = g u1 # (IO k) = a2mb x = k u2 putStr :: String -> IO () putStr str = withWorld f where f world # (out, world) = stdio world # out = fwrites str out # (_, world) = fclose out world = ((), world) putStrLn :: String -> IO () putStrLn str = putStr (str +++ "\n") print :: a -> IO () | toString a print x = putStrLn (toString x) getChar :: IO Char getChar = withWorld f where f world # (input, world) = stdio world # (ok, c, input) = freadc input # (_, world) = fclose input world = (c, world) getLine :: IO String getLine = withWorld f where f world # (input, world) = stdio world # (str, input) = freadline input # (_, world) = fclose input world = (trim str, world) readFileM :: !String -> IO String readFileM name = withWorld f where f world # (ok, file, world) = fopen name FReadText world # (str, file) = freads file 16777216 # (ok, world) = fclose file world = (str, world) writeFileM :: !String !String -> IO () writeFileM name txt = withWorld f where f world # (ok, file, world) = fopen name FWriteText world # file = fwrites txt file # (ok, world) = fclose file world = ((), world) unsafePerformIO :: !(*OI -> .a) -> .a unsafePerformIO f #! x = f make_oi = x unsafePerformIOTrue :: !(*OI -> a) -> Bool unsafePerformIOTrue f #! x = f make_oi = True make_oi # (u, w) = newOI make_world = from_world "make_oi" u w from_world :: String a !*World -> a from_world name x world | world_to_true world = x | otherwise = abort ("error in +++ name +++ "\n") world_to_true :: !*World -> Bool world_to_true world = code inline { pop_a 1 pushB TRUE } make_world = code { fillI 65536 0 }
-
definition module System._OI from StdFunc import id :: OI partOI :: *OI -> *(*OI, *OI) newOI :: *World -> *(*OI, *World) getWorld :: *OI -> *World
-
implementation module System._OI :: OI = OI partOI :: !*OI -> *(*OI, *OI) // NOTE - this may need partOI u // to be defined with # u1 = OI // ABC instructions # u2 = id OI = (u1, u2) newOI :: !*World -> *(*OI, *World) // ...or: Start :: *OI -> () newOI world = (OI, world) getWorld :: !*OI -> *World // only needed because I/O getWorld OI // operations in Clean 3.0 = code inline { fillA 65536 0 } // rely on World values
精彩评论