Prolog in Haskell - Ergonomics, Terms, and Unification

In the previous log, we have started a naive Prolog implementation.

We had come up with the following odd implementation of Arity/Args:

class (KnownNat a, Eq (Args a)) => Arity (a :: Nat) where
  type Args a :: Type

instance Arity 0 where
  type Args 0 = ()

instance Arity 1 where
  type Args 1 = Atom

instance Arity 2 where
  type Args 2 = (Atom, Atom)

instance Arity 3 where
  type Args 3 = (Atom, Atom, Atom)

Usually, we design type classes around plain types and add "capabilities", let's reverse this as follows:

class (Eq a, Typeable a) => Args a

instance Args ()

instance Args Atom

instance Args (Atom, Atom)

instance Args (Atom, Atom, Atom)

It totally eliminates Arity, which simplify the code at the cost of forcing Typeable, but it enables us to write the following helpers:

predicate :: (Args args) => String -> (args, [Goal]) -> [(args, [Goal])] -> Predicate
predicate name firstClause otherClauses = Predicate (Atom name) (firstClause :| otherClauses)

(@) :: (Args args) => String -> args -> Goal
(@) name = Goal (Atom name)

We can rewrite our tests as follows:

spec :: Spec
spec =
  describe "Prolog" $ do
    it "atom-based facts and atom-based should be valid" $
      hasGoal [fruit] ("fruit" @ Atom "tomato") `shouldBe` True
    it "atom-based facts and atom-based should be invalid" $
      hasGoal [fruit] ("fruit" @ Atom "carrots") `shouldBe` False

-- * Fixtures

fruit :: Predicate
fruit =
  predicate "fruit" (Atom "tomato", []) [(Atom "melon", [])]

Much more expressive!

The next step is to introduce terms as follows:

data Term
  = TAtom Atom
  | TVar Var
  | TGoal Goal

The next step is to introduce unification, it's a fascinating which is something like a bidirectional equality, for example, the following terms unified:

a = a.
42 = 42.
Answer = 42.
question(a) = question(a).
question(X) = question(42). % X = 42.

Each unification is called a Substitution, we can draft a first term-focused unification function, as follows:

type Substitution = Map Var Term

unifyTerm :: Term -> Term -> Maybe Substitution
unifyTerm x y =
  case (x, y) of
    (TAtom x', TAtom y') -> guard (x' == y') *> Just mempty
    (TGoal x', TGoal y') -> guard (x' == y') *> Just mempty
    (TVar v, _) -> Just $ Map.singleton v y
    (_, TVar v) -> Just $ Map.singleton v x
    (TAtom _, TGoal _) -> Nothing
    (TGoal _, TAtom _) -> Nothing

Sadly, to lift it to Args-level, we have to be type-specific, widening Args as follows:

class (Eq a, Show a, Typeable a) => Args a where
  unify :: a -> a -> Maybe Substitution

instance Args () where
  unify () () = Just mempty

instance Args Term where
  unify = unifyTerm

instance Args (Term, Term) where
  unify (x0, x1) (y0, y1) = (<>) <$> unifyTerm x0 y0 <*> unifyTerm x1 y1

instance Args (Term, Term, Term) where
  unify (x0, x1, x2) (y0, y1, y2) = do
    r0 <- unifyTerm x0 y0
    r1 <- unifyTerm x1 y1
    r2 <- unifyTerm x2 y2
    pure $ mconcat [r0, r1, r2]

We can adjust hasGoal in order to replace the hard-coded unification based on equality as follows:

hasGoal :: [Predicate] -> Goal -> Bool
hasGoal ps (Goal gFunctor gArgs) = any go ps
  where
    go (Predicate pFunctor pClauses) =
      case eqTypeRep (typeOf gArgs) (typeOf $ fst $ NonEmpty.head pClauses) of
        Just HRefl
          | gFunctor == pFunctor ->
              any (\(cArgs, goals) -> isJust (unify cArgs gArgs) && all (hasGoal ps) goals) pClauses
        _ -> False

We can try it, adding the following tests:

it "rules facts and atom-based goal should be valid" $
  hasGoal [fruit, salad, fruitSalad] ("fruitSalad" @ atom "melon") `shouldBe` True
it "rules facts and atom-based goal should be invalid" $
  hasGoal [fruit, salad, fruitSalad] ("fruitSalad" @ atom "tomato") `shouldBe` False

Unfortunately, the second test is failing because there are no interactions between each term unification, noticeably between arguments and goals.

To properly solve this problem, we have to somehow accumulate and apply Substitution on Var along the way, we can start with unifyTerm as follows:

unifyTerm :: Substitution -> Term -> Term -> Maybe Substitution
unifyTerm ss x y =
  case (sub x, sub y) of
    (TAtom x', TAtom y') -> guard (x' == y') *> Just mempty
    (TGoal x', TGoal y') -> guard (x' == y') *> Just mempty
    (TVar v, _) -> Just $ Map.singleton v y
    (_, TVar v) -> Just $ Map.singleton v x
    (TAtom _, TGoal _) -> Nothing
    (TGoal _, TAtom _) -> Nothing
  where
    sub :: Term -> Term
    sub =
      \case
        TVar v | Just t <- Map.lookup v ss -> t
        o -> o

The next step is to update Args instances in order to accumulate Substitutions, as follows:

class (Eq a, Show a, Typeable a) => Args a where
  unify :: Substitution -> a -> a -> Maybe Substitution

instance Args () where
  unify _ () () = Just mempty

instance Args Term where
  unify = unifyTerm

instance Args (Term, Term) where
  unify ss (x0, x1) (y0, y1) = do
    r0 <- unifyTerm ss x0 y0
    r1 <- unifyTerm (ss <> r0) x1 y1
    pure $ mconcat [r0, r1]

instance Args (Term, Term, Term) where
  unify ss (x0, x1, x2) (y0, y1, y2) = do
    r0 <- unifyTerm ss x0 y0
    r1 <- unifyTerm (ss <> r0) x1 y1
    r2 <- unifyTerm (ss <> r0 <> r1) x2 y2
    pure $ mconcat [r0, r1, r2]

The final step, for this already-too-long-log, is to adjust hasGoal, but there is a design defect: we do not produce Substitutions, so we cannot use recursion on it, we have to split it.

The other defect is that, we have limited ourselves to the first match, while we might have many.

It gives the following design:

hasGoal :: [Predicate] -> Goal -> Bool
hasGoal ps = not . null . query mempty ps

query :: Substitution -> [Predicate] -> Goal -> [Substitution]
query ss ps (Goal gFunctor gArgs) = do
  Predicate pFunctor pClauses <- ps
  guard (gFunctor == pFunctor)
  (cArgs, body) <- toList pClauses

  case eqTypeRep (typeOf gArgs) (typeOf cArgs) of
    Just HRefl -> do
      ssa <- maybeToList $ unify ss cArgs gArgs
      solveGoals (ss <> ssa) body
    Nothing -> []
  where
    solveGoals :: Substitution -> [Goal] -> [Substitution]
    solveGoals ss' =
      \case
        [] -> [ss']
        (g : gs) -> do
          ss'' <- query ss' ps g
          solveGoals ss'' gs

Our implementation is still far to be complete:

  • Variables are mixed-up, when there are rules-recursions, same-named variable will go in the same set
  • Substitution are accumulated via (<>), which indistinctly
  • Produce Substitution contains intermediary Var

But that'll be the content of next logs.