% \documentclass{jfp}
% \bibliographystyle{jfp}
\documentclass{sigplanconf}
\usepackage{natbib}
\bibpunct();A{},
\let\cite=\citep
\usepackage{me}

\usepackage{url}
\usepackage[english]{babel}
%\usepackage[]{inputenc}
%\usepackage[T1]{fontenc}
\usepackage{coqdoc}
\usepackage{xspace}
\usepackage{abbrevs}
\usepackage{coq}
\usepackage{bussproofs}
\usepackage{tikz}
\usetikzlibrary{shapes}
% \usepackage{pgfpages}
% \pgfpagesuselayout{resize to}[a4paper]
\usepackage{cond}

% Optional to turn on the short abbreviations
\EnableBpAbbreviations
\def\ScoreOverhang{3pt}			% How much underlines extend out
\def\defaultHypSeparation{\hskip.05in}   % Used if \insertBetweenHyps isn't given

\usepackage[active]{srcltx}

\def\extended#1{}

\def\measure#1{\ensuremath{\|#1\|}}

\def\mempty{\emptyset}
\def\mappend{\odot}

\def\ident#1{\textit{#1}~}

\def\classid#1{\textsf{#1}}

\def\propname#1{\textbf{#1}}

\def\nil{[~\!]}
\def\app{\mathbin{+\!\!\!+}}
\def\tip{[\_]}
\def\treeapp{\mathbin{+\!\!:\!\!+}}

\def\lang{en}
\input{mathenv}
\input{typing-macros}
\input{typemacros}

\def\id{\coqdocid}
\def\ind{\coqdocind}
\def\constr{\coqdocconstr}
\def\kw{\coqdockw}
\def\class{\kw{class}~}
\def\where{\kw{where}~}
\def\module#1{\texttt{#1}}
\def\tactic#1{\texttt{#1}}

\newenvironment{coq}{%
   
  \medskip}{\medskip}

% \def\anon#1{Omitted for submission}
% \authorinfo{Anonymous}
% {}
% {}

\def\anon#1{#1}

\authorinfo{\myname}{\myaffiliation}{\mymail}


\def\mykeywords{Coq, Dependent Types, Finger Trees, Certification}

\usepackage{hyperref}
\hypersetup{
  pdftitle = Program-ing Finger Trees in Coq,
  pdfauthor = \anon{\myname},
  pdfsubject = Computer Science: Programming,
  pdfkeywords = \mykeywords  
 } 



\def\shortcite#1{\cite{#1}}
\def\programmath{}
\def\unprogrammath{}

\title{\Program-ing Finger Trees in \Coq}

\def\tikfingertree{
  \begin{tikzpicture}
    \tikzstyle{every circle node}=[circle,draw]
    \node[draw,fill=black!33] {\constr{Deep} $\measure{a} \cdots \measure{l}$}
    [level distance=1cm,sibling distance=3cm]
    child { node {\constr{Two}}
      [sibling distance=0.75cm]
      child { node[circle] { a } }
      child { node[circle] { b } }
    }
    child[level distance=2cm] {
      node[draw,fill=black!33] {\constr{Deep}
        $\measure{c} \cdots \measure{i}$}
      [level distance=1cm,sibling distance=1.5cm]
      child { node {\constr{Two}}
        [sibling distance=3cm]
        child { node[draw,fill=black!33] {\constr{Node2}
            $\measure{c} \cdot \measure{d}$}
          [sibling distance=0.75cm]
          child { node[circle] { c } }
          child { node[circle] { d } }}
        child { node[draw,fill=black!33] {\constr{Node3}
            $\measure{e} \cdot \measure{f} \cdot \measure{g}$}
          [sibling distance=0.75cm]
          child { node[circle] { e } }
          child { node[circle] { f } }
          child { node[circle] { g } }
        }
      }
      child { node {\constr{Empty}} }
      child[level distance=1cm,sibling distance=3cm] { node {\constr{One}}
        child { node[draw,fill=black!33] {\constr{Node2} 
            $\measure{h} \cdot \measure{i}$ } 
          [sibling distance=0.75cm]
          child { node[circle] { h } }
          child { node[circle] { i } }
        }
      }
    }
    child { node {\constr{Three}}
      [sibling distance=0.75cm]
      child { node[circle] { j } }
      child { node[circle] { k } }
      child { node[circle] { l } }
    };
  \end{tikzpicture} 
}

\def\fingertreeFig{
  \begin{center}
    \UAX{}
    {}%\id{A} : \Type, \id{measure} : $A "->" v$}
    {\constr{Empty} : \ind{fingertree} \id{A} \id{measure} $\varepsilon$}
    {}
    \DP

    \vspace{0.5em}
    \UAX{}
    %\id{A} : \Type, \id{measure} : $A "->" v$, 
    {\id{x} : \id{A}}
    {\constr{Single} \id{x} : \ind{fingertree} \id{A} \id{measure}
      (\id{measure} \id{x})}
    {}
    \DP

    \vspace{0.5em}
    %\id{A} : \Type, \id{measure} : $A "->" v$, 
    \AXC{\id{pr}, \id{sf} : \id{digit} \id{A}, \id{ms} : \id{v}}
    \noLine
    \UIC{\id{m} : \ind{fingertree} (\id{node} \id{measure} \id{A})
      (\id{node\_measure} \id{measure}) \id{ms}}
    \UIC{\constr{Deep} \id{pr} \id{ms} \id{m} \id{sf} : \ind{fingertree}
      \id{A} \id{measure}}
    \noLine
    \UIC{(\id{digit\_measure} \id{measure} \id{pr} $\cdot$ \id{ms} $\cdot$
      \id{digit\_measure} \id{measure} \id{sf})}
    \DP
  \end{center}
}

\def\deepLfig{
\label{def:deepL}
\coqdocindent{2.00em}
\coqdockw{Program Definition} \coqdocid{deep\_L} (\coqdocid{A} : \coqdockw{Type}) (\coqdocid{measure} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v}) \coqdoceol
\coqdocindent{3.00em}
(\coqdocid{d} : \coqdocind{option} (\coqdocind{digit} \coqdocid{A})) (\coqdocid{s} : \coqdocid{v}) \coqdoceol
\coqdocindent{3.00em}
(\coqdocid{mid} : \coqdocind{fingertree} (\coqdocid{node\_measure} \coqdocid{measure}) \coqdocid{s})\coqdoceol
\coqdocindent{3.00em}
(\coqdocid{sf} : \coqdocind{digit} \coqdocid{A}) : \coqdocind{fingertree} \coqdocid{measure} (\coqdocid{option\_digit\_measure} \coqdoceol
\coqdocindent{4.00em}
\coqdocid{measure} \coqdocid{d} $\cdot$ \coqdocid{s} $\cdot$
\coqdocid{digit\_measure} \coqdocid{measure} \coqdocid{sf}) $\coloneqq$ $\cdots$
}

\def\splitnodefig{
\label{def:splitnode}
\coqdocindent{3.00em}
\coqdockw{Program Definition} \coqdocid{split\_node} (\coqdocid{n} : \coqdocind{node} \coqdocid{measure}) : \coqdoceol
\coqdocindent{3.00em}
\{ (\coqdocid{l}, \coqdocid{x}, \coqdocid{r}) : \coqdocind{option} (\coqdocind{digit} \coqdocid{A}) \ensuremath{\times} \coqdocid{A} \ensuremath{\times} \coqdocind{option} (\coqdocind{digit} \coqdocid{A}) \coqor  \coqdoceol
\coqdocindent{4.00em}
\coqdockw{let} \coqdocid{odm} $\coloneqq$
\coqdocid{option\_digit\_measure} \coqdocid{measure} \coqdockw{in}\coqdoceol
\coqdocindent{4.00em}
\coqdocid{node\_measure} \coqdocid{measure} \coqdocid{n} = \id{odm}
\coqdocid{l} $\cdot$ $\coqdoublebar$ \coqdocid{x} $\coqdoublebar$
$\cdot$ \id{odm} \coqdocid{r} \ensuremath{\land}\coqdoceol
\coqdocindent{4.00em}
\coqdocid{node\_to\_list} \coqdocid{n} = \coqdocid{option\_digit\_to\_list} \coqdocid{l} $\app$ [x] $\app$ \coqdoceol
\coqdocindent{5.00em}
\coqdocid{option\_digit\_to\_list} \coqdocid{r} \ensuremath{\land}\coqdoceol
\coqdocindent{4.00em}
(\coqdocid{l} = \coqdocconstr{None} \ensuremath{\lor} \coqdocid{p} (\coqdocid{i} $\cdot$ \coqdocid{ls}) = \coqdocconstr{false}) \ensuremath{\land}\coqdoceol
\coqdocindent{4.00em}
(\coqdocid{r} = \coqdocconstr{None} \ensuremath{\lor} \coqdocid{p}
(\coqdocid{i} $\cdot$ \coqdocid{ls} $\cdot$ $\coqdoublebar$ \coqdocid{x}
$\coqdoublebar$) = \coqdocconstr{true})\} $\coloneqq \cdots$
}

\def\splittreefig{
\coqdockw{Program Fixpoint} \coqdocid{split\_tree} (\coqdocid{A} : \coqdockw{Type}) (\coqdocid{measure} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v})\coqdoceol
\coqdocindent{4.00em}
(\coqdocid{i} \coqdocid{s} : \coqdocid{v}) (\coqdocid{t} :
\coqdocind{fingertree} \coqdocid{measure} \coqdocid{s} \coqor  \ensuremath{\lnot} \coqdocid{isEmpty} \coqdocid{t})\coqdoceol
\coqdocindent{4.00em} : \coqdocind{tree\_split}
\coqdocid{measure} \coqdocid{i} \coqdocid{s} $\coloneqq \cdots$
}

\def\splittreeauxfig{
\coqdockw{Program Fixpoint} \coqdocid{split\_tree'} (\coqdocid{A} : \coqdockw{Type}) (\coqdocid{measure} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v})\coqdoceol
\coqdocindent{4.00em}
(\coqdocid{i} \coqdocid{s} : \coqdocid{v}) (\coqdocid{t} : \coqdocind{fingertree} \coqdocid{measure} \coqdocid{s}) (\coqdocid{pr} : \coqdocind{unit} \coqor  \ensuremath{\lnot} \coqdocid{isEmpty} \coqdocid{t})\coqdoceol
\coqdocindent{4.00em}
\{\coqdockw{struct} \coqdocid{t}\} : \coqdocind{tree\_split}
\coqdocid{measure} \coqdocid{i} \coqdocid{s} $\coloneqq \cdots$
}


\begin{document}
\conferenceinfo{ICFP'07,} {October 1--3, 2007, Freiburg, Germany.}
\CopyrightYear{2007}
\copyrightdata{978-1-59593-815-2/07/0010}

\maketitle

\begin{abstract}
Finger Trees \cite{hinze:FingerTrees} are a general purpose persistent
data structure with good performance. Their genericity permits developing
a wealth of structures like ordered sequences or interval trees
on top of a single implementation. However, the type systems
used by current functional languages do not guarantee the coherent
parameterization and specialization of Finger Trees, let alone the
correctness of their implementation.%  This problem of modularity can be overcomed
% in a system with dependent types and higher-rank polymorphism like
% \Coq.
We present a certified implementation of Finger Trees solving
these problems using the \Program extension of \Coq. 
We not only implement the structure but also prove its
invariants along the way, which permit building certified
structures on top of Finger Trees in an elegant way.
\end{abstract}

\category{D.2.4}{Software/Program Verification}{Correctness proofs,
  Formal methods}
\category{F.3.1}{Specifying and Verifying and Reasoning about
  Programs}{Mechanical verification, Specification techniques}

\terms
Algorithms, Languages, Verification

\keywords
\mykeywords

\section{Introduction}
Finger Trees are based on an optimization of 2-3 trees which gives
constant or in the worst case logarithmic amortized time to the usual sequence
operations, from adding an element at either end to splitting at an
arbitrary location. Suffice to say that it has been integrated as the
\module{Data.Sequence} implementation in {\sc \citet*{Haskell}} 6.6 to witness the fact
that it is a practical, useful data structure. Our first contribution will be to
show that the implementation given by Hinze \& Paterson is correct,
which means that all functions are terminating 
and that the invariants of Finger Trees are respected. 
In fact, as the invariants are part
of our data structure we will also carry the properties to the client
side, which lead us to our second contribution, a certified
implementation of random-access sequences built on top of Finger Trees.
This development
\footnote{\url{www.lri.fr/~sozeau/research/russell/fingertrees.en.html}}
was made possible using the \Program extension of {\sc \citet*{Coq}}. It is
based on a high-level programming language (\Russell) which permits
writing functional programs with rich
specifications and a translation to \Coq generating proof obligations.
This paper also serves as a gentle introduction to \Program-ing in \Coq.

The remaining of this paper is organized as follows: in sections
\ref{sec:Preliminaries} and \ref{sec:Russell} we give a short
introduction to \Coq and \Russell respectively, walking
through simple examples. Then we move on to the implementation of Finger
Trees, in section \ref{sec:FingerTrees}, their specialization in
section \ref{sec:Instances} and their extraction in section
\ref{sec:Extraction}.
We finally discuss our method and results in
section \ref{sec:Discussion}. No prior knowledge of the \Coq tool is assumed, but
practice of a typed functional language is recommended. Familiarity with
Finger Trees is not required either.

\paragraph{Notations}
The bulk of this paper consists of a set of literate \Coq files
processed by the \verb|coqdoc| tool.
\Coq code is typeset with the usual mathematical style;
variable and definition names are typeset in \id{italic}, inductive
types and constructor names are typeset in \constr{sans-serif} and language
keywords are typeset in \kw{typewriter} font.
% We use the telescope notation of de Bruijn $\tele{x_i : T_i} \eqdef x_0
% : T_0, \cdots, x_i : T_i$ where $T_{j+1}$ may
% refer to variables $x_0 \cdots x_j$.
% An index of all the definitions used in
% the paper with their type or a reference to their definition is given in the appendix.
% Typing judgements are used in an
% informal, simplified way as a mean to present an idea and should not be taken too seriously.

\section{Preliminaries}
\label{sec:Preliminaries}

In this section we present \Coq's underlying calculus and some features
of the tool we use in the rest of the paper. It can be safely skipped by
\Coq users.

\subsection{\CIC power}
\Coq is a proof assistant based on the Calculus of Inductive
Constructions (\CIC), a very powerful type system and logical language. It
relies on the Curry-Howard isomorphism between proofs and programs,
types and logical statements to provide an environment in which
programming and proving are seemingly intermingled. \CIC is
an extension of the Calculus of Constructions (\CC), the richest type system
of Barendregt's $\lambda$-cube, which integrates polymorphic, dependent
types with highly expressive (co-)inductive types. The environment built
around this kernel allows users to do interactive theorem proving of
mathematical developments and certified programming using the underlying
$\lambda$-calculus. It provides facilities to define
datatypes and functions like a regular functional programming language 
but it also gives the ability to write rich specifications and construct proofs 
in the same language viewed as a higher-order logic. 

We present the typing rules of \CC in figure \ref{fig:cc-rules} as a reminder. We will
extend them when defining \Russell's type system in section
\ref{sec:Russell}. The basic calculus is church-style $\lambda$-calculus plus a set of
sorts $\PTSsorts = \{ \Prop \} `U \{ \Type(i) `| i `: `N \}$ 
and the dependent product $\Pi x : A, B$ which
types abstractions (rules \irule{Prod}, \irule{Abs}, \irule{App}).
We do not detail the relation $\mathcal{R}$ which allows an
impredicative $\Prop$ sort and a predicative $\Type$ hierarchy.
The sort $\Type(0)$ represents computational types
(e.g. lists, naturals) and $\Prop$ logical propositions. Both sorts are
typed by $\Type(1)$ which is itself typed using the rule \irule{Type}.
An essential rule is \irule{Conv} which internalizes the
fact that $\beta$-convertible types can be considered equal during
type-checking ($`=$ is the $\beta$-equivalence relation). 
Here lies the true power of the system: computation can
be interleaved with reasoning anywhere. 
We do not treat the full kernel of \Coq, which also contain sort
subtyping and inductive datatypes as they are not essential to describe
the core features \Russell will add; inductives will be described next
by example.

\begin{figure}[tb]
  \begin{center}
    \def\seq{\typed}
    \def\fCenter{\wf}
    \WfEmpty\DP
    \WfVar\DP
    
    \def\fCenter{\typed}
    \vspace{\infvspace}
    \PropTypeone\DP 
    
    \vspace{\infvspace}
    \TypeType\DP
    
    \vspace{\infvspace}
    \Var\DP
    
    \vspace{\infvspace}
    \ProdType\DP
    
    \vspace{\infvspace}
    \Abs\DP

    \vspace{\infvspace}
    \App\DP

    \vspace{\infvspace}
    \Conv\DP
  \end{center}
  \caption{Calculus of Constructions}
  \label{fig:cc-rules}
\end{figure}

Important properties of this system are Subject Reduction and Strong
Normalization, i.e.: all well-typed terms reduce to a value in a finite number of
reductions, which ensures that typing is decidable in the \irule{Conv}
rule. With such expressive power and strong metatheoretical
properties comes verbosity and inflexibility. We will present in the
following sections what mechanisms help overcome these difficulties in
the \Coq tool. But first we will present the salient features of
inductives in \Coq.

\paragraph{Inductive types}
Inductive types are a generalization of the usual algebraic datatypes of 
\ML. For example we may define the standard polymorphic lists using
the declaration:

\begin{coq}
\coqdockw{Inductive} \coqdocind{list} (\coqdocid{A} : \coqdockw{Type}) : \coqdockw{Type} :=\coqdoceol
\coqor  \coqdocconstr{nil} : \coqdocind{list} \coqdocid{A}\coqdoceol
\coqor  \coqdocconstr{cons} : \coqdocid{A} \ensuremath{\to} \coqdocind{list} \coqdocid{A} \ensuremath{\to} \coqdocind{list} \coqdocid{A}.\coqdoceol
\end{coq}

Here \id{A} is an \emph{inductive parameter} representing the type of elements of
the list and each constructor is declared
with a product type from its arguments types to the inductive type. This
presentation as a product comes from the fact that constructor arguments may depend
on each other and that each constructor may target a different instance
of the family. For example we may declare vectors (lists of fixed length) as:

\begin{coq}
\coqdockw{Inductive} \coqdocind{vector} (\coqdocid{A} : \coqdockw{Type}) : \coqdocind{nat} \ensuremath{\to} \coqdockw{Type} :=\coqdoceol
\coqor  \coqdocconstr{vnil} : \coqdocind{vector} \coqdocid{A} 0\coqdoceol
\coqor  \coqdocconstr{vcons} : \coqdocid{A} \ensuremath{\to} \ensuremath{\forall} \coqdocid{n}, \coqdocind{vector} \coqdocid{A} \coqdocid{n} \ensuremath{\to} \coqdocind{vector} \coqdocid{A} (\coqdocconstr{S} \coqdocid{n}).\coqdoceol
\end{coq}

The inductives of \Coq \cite{paulin93tlca} are in fact
parameterized, possibly nested, mutually recursive inductive families. 
An inductive family is a family of types indexed by a type or a value.
Here for example, \ind{vector} is a
family parameterized by a type \id{A} of elements and indexed by
the naturals $\coqdocind{nat}$. The \constr{vnil} constructor is the
empty vector of length \constr{0} and the \constr{vcons} constructor
builds a \ind{vector} of length \constr{S} \id{n} from an object of type \id{A} and a
\ind{vector} of length \id{n}.

Finally, we can define nested inductive types using non-uniform parameters;
this feature is needed to be able to define Finger Trees. A
classical example of a nested datatype is the power list of \id{A} where the tail of
a list is a list of pairs of \id{A}. The important point is that we can
reinstantiate the parameter \id{A} in the constructors.

\begin{coq}
\coqdockw{Inductive} \coqdocind{pow\_list} (\coqdocid{A} : \coqdockw{Type}) : \coqdockw{Type} :=\coqdoceol
\coqor  \coqdocconstr{pow\_nil} : \coqdocind{pow\_list} \coqdocid{A}\coqdoceol
\coqor  \coqdocconstr{pow\_cons} : \coqdocid{A} \ensuremath{\to} \coqdocind{pow\_list} (\coqdocid{A} \ensuremath{\times} \coqdocid{A}) \ensuremath{\to} \coqdocind{pow\_list} \coqdocid{A}.\coqdoceol
\end{coq}

We can of course recurse on elements of these inductive types and in the
last case we will even use polymorphic recursion, a feature the type
system of \ML lacks. For example the \id{map} combinator on power lists can be
defined as in figure \ref{fig:powmap}.

\begin{figure}
\noindent
\coqdockw{Fixpoint} \coqdocid{pow\_map} (\coqdocid{A} \coqdocid{B} : \coqdockw{Type}) (\coqdocid{f} : \coqdocid{A} \ensuremath{\to} \coqdocid{B}) \coqdoceol
\coqdocindent{1.00em}
(\coqdocid{l} : \coqdocind{pow\_list} \coqdocid{A}) \{ \coqdockw{struct} \coqdocid{l} \} : \coqdocind{pow\_list} \coqdocid{B} := \coqdoceol
\coqdocindent{1.00em}
\coqdockw{match} \coqdocid{l} \coqdockw{with}\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocconstr{pow\_nil} \ensuremath{\Rightarrow} \coqdocconstr{pow\_nil} \coqdocid{B}\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocconstr{pow\_cons} \coqdocid{hd} \coqdocid{tl} \ensuremath{\Rightarrow} \coqdocconstr{pow\_cons} \coqdocid{B} (\coqdocid{f} \coqdocid{hd}) \coqdoceol
\coqdocindent{3.00em}
(\coqdocid{pow\_map} (\coqdocid{A} \ensuremath{\times} \coqdocid{A}) (\coqdocid{B} \ensuremath{\times} \coqdocid{B}) \coqdoceol
\coqdocindent{4.00em}
(\coqdockw{fun} \coqdocid{x} : \coqdocid{A} \ensuremath{\times} \coqdocid{A} \ensuremath{\Rightarrow} (\coqdocid{f} (\coqdocid{fst} \coqdocid{x}), \coqdocid{f} (\coqdocid{snd} \coqdocid{x}))) \coqdocid{tl})\coqdoceol
\coqdocindent{1.00em}
\coqdockw{end}.\coqdoceol
\caption{\id{pow\_map} definition}
\label{fig:powmap}
\end{figure}

To ensure the aforementioned properties are preserved when adding
inductive families, the kernel implements a relatively
inflexible syntactic criterion to discard
non-structurally recursive definitions. In the \id{pow\_map} example, it
checks that \id{tl} is indeed a subterm of \id{l} which the user 
marked as the structurally decreasing argument using the $\{ \kw{struct}~id
\}$ annotation.
In this definition we also face the foremost difficulty in using \CIC as a
programming language: verbosity. Clearly, writing all these types is
not viable from a software development point of view, so we must find a way
to overcome this problem while still retaining decidability of inference
and type-checking. \Coq's solution to this problem is to use a
higher-level language \Gallina which elaborates/compiles into \CIC. This
approach does not change the kernel's language \emph{at all}, so we keep
all the nice properties once our definitions have compiled into it.

\subsection{\Coq surface: \Gallina}
We will now present a few of \Coq's surface language features 
which deal with the verbosity and abstraction power of \CIC: 
implicit arguments to omit some type information,
notations to abbreviate and beautify the otherwise obfuscated
definitions and sections to easily parameterize whole developments.

\subsubsection{Recovering inference} 
Implicit arguments systems permit writing terms omitting as much
inferable information as possible. Its effect is best described
by an example application. Suppose we want to apply the \id{pow\_map}
function to a function $f : A "->" B$ and a power list $l : \ind{pow\_list}~A$. Without implicit
arguments, we must give the type of $f$ as the first arguments of the
function, like this: $\id{pow\_map}~A~B~f~l$. In implicit arguments mode, 
the first two arguments become implicit as the system knows that
they will be inferable at application time from the \emph{type} of the
functional argument $f$, hence we can write $\id{pow\_map}~f~l$ to achieve the same
effect. This permits users to write definitions
with a syntax very similar to languages based on Hindley-Milner type
inference, e.g. the \id{pow\_map} definition comes down to the code in
figure \ref{fig:powmapimpl}. We do not consider partial
applications here for which this mechanism is incomplete.

\begin{figure}[h]
\noindent
\coqdockw{Fixpoint} \coqdocid{pow\_map} (\coqdocid{A} \coqdocid{B} : \coqdockw{Type}) (\coqdocid{f} : \coqdocid{A} \ensuremath{\to} \coqdocid{B}) \coqdoceol
\coqdocindent{1.00em}
(\coqdocid{l} : \coqdocind{pow\_list} \coqdocid{A}) \{ \coqdockw{struct} \coqdocid{l} \} : \coqdocind{pow\_list} \coqdocid{B} := \coqdoceol
\coqdocindent{1.00em}
\coqdockw{match} \coqdocid{l} \coqdockw{with}\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocconstr{pow\_nil} \ensuremath{\Rightarrow} \coqdocconstr{pow\_nil}\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocconstr{pow\_cons} \coqdocid{hd} \coqdocid{tl} \ensuremath{\Rightarrow} \coqdocconstr{pow\_cons} (\coqdocid{f} \coqdocid{hd}) \coqdoceol
\coqdocindent{3.00em}
(\coqdocid{pow\_map} (\coqdockw{fun} \coqdocid{x} \ensuremath{\Rightarrow} (\coqdocid{f} (\coqdocid{fst} \coqdocid{x}), \coqdocid{f} (\coqdocid{snd} \coqdocid{x}))) \coqdocid{tl})\coqdoceol
\coqdocindent{1.00em}
\coqdockw{end}.\coqdoceol
\caption{\id{pow\_map} definition with implicit arguments} 
\label{fig:powmapimpl}
\end{figure}

\subsubsection{Notations: pretty parsing/printing}
Notations are kind of hygienic macros which serve to abbreviate or
beautify specifications and generally give a more mathematical feel to
writing \Coq.
For example we can define the type representing the subset of elements
of a type $A$ having a particular property $P$ as in figure \ref{fig:sig}. 

\begin{figure}[ht]
\coqdockw{Inductive} \coqdocind{sig} (\coqdocid{A} : \coqdockw{Type}) (\coqdocid{P} : \coqdocid{A} \ensuremath{\to} \coqdockw{Prop}) : \coqdockw{Type} :=\coqdoceol
\coqor  \coqdocconstr{exist} : \ensuremath{\forall} \coqdocid{a} :
\coqdocid{A}, \coqdocid{P} \coqdocid{a} \ensuremath{\to}
\coqdocind{sig} \coqdocid{A} \coqdocid{P}.\coqdoceol
\coqdockw{Notation} "\{ \coqdocid{x} : \coqdocid{A} \coqor \id{P} \}" :=
(\ind{sig} \id{A} (\kw{fun} \id{x} : \id{A} \ensuremath{\Rightarrow} \id{P}))
\caption{Subset type in Coq}
\label{fig:sig}
\end{figure}

An element of the type \coqdocind{sig} \coqdocid{A} \coqdocid{P} is a
tuple $\constr{exist}~A~P~x~p$ where $x$ is a witness of type $A$ and $p$ is of type
$P~a$, i.e. $p$ is a proof that the predicate $P$ holds for $a$. 
However it is much clearer to write this type as $\{ x : A `| P \}$
rather than $\ind{sig}~A~P$ and this is what the
notation system permits.

An alternative encoding of the \ind{vector}
inductive structure may be the lists of fixed lengths which we can
define using this notation (the types of $A$ and $n$ can be infered
automatically in this case):
\begin{coq}
\coqdockw{Definition} \coqdocind{vector} \coqdocid{A} \coqdocid{n} := \{ \coqdocid{x} : \coqdocind{list} \coqdocid{A} \coqor  \coqdocid{length} \coqdocid{x} = \coqdocid{n} \}.\coqdoceol
\end{coq}
One can define arbitrary notations, possibly for binding constructs,
and assign them priorities (levels) and associativities. 
For example we may define composition using the symbol $\circ$ as a notation:
\begin{coq}
\coqdockw{Notation} "\coqdocid{g} '$\circ$' \coqdocid{f}" :=
(\coqdockw{fun} \coqdocid{x} \ensuremath{\Rightarrow} \coqdocid{g}
(\coqdocid{f} \coqdocid{x})) \coqdoceol 
\coqdocindent{3.00em}(\coqdocid{at} \coqdocid{level} 20, \coqdocid{left} \coqdocid{associativity}).\coqdoceol
\end{coq}

Armed with these tools we can start our first development.

\subsubsection{Sections: implementing the monoid typeclass}
As a gentle introduction to \Coq, we will build a monoid datatype 
corresponding to the \classid{Monoid} typeclass of \Haskell. We recall its signature:
\programmath
\[
\class \classid{Monoid}~m~\where \\
\begin{array}{lcl}
  \mempty & :: & m \\
  (\mappend) & :: & m "->" m "->" m
\end{array}
\]
\unprogrammath
Instances should make sure the following properties hold:
\begin{center}
  $\begin{array}{ll}
    \propname{left identity} & \mempty \mappend x = x \\
    \propname{right identity} & x \mappend \mempty = x \\
    \propname{associativity} & x \mappend (y \mappend z) = (x \mappend y) \mappend z 
  \end{array}$
\end{center}
\input{MonoidExample}

We will come back to monoids when implementing Finger Trees. We will now
present the \Russell language which was the essential tool we needed to develop them.

\section{\Russell in \Coq}
\label{sec:Russell}
The lack of distinction between proofs and programs in Type Theory helps having a clear,
unified semantics for the complete system but hinders users and
implementers alike, because sometimes you do not want to have proofs
polluting computations (e.g. when evaluating a term, the way proofs are
constructed should not matter, only the algorithm) and you certainly do
not want to write complex programs using a procedural language with
automation like the language used in proof scripts of \Coq but directly inputing
terms and have them interpreted in \Coq's kernel language.
A solution to the first problem is given by the $\Prop/\Type$ distinction
in \Coq. If $\Prop$ is used as the type of propositions (e.g.
conjunction, negation) and $\Type$ as the type of computational types
(e.g. naturals, lists) then we can immediately see whether something is
a proof or a program just by looking at its sort.
%  (this is not true when
% we have cumulativity and $\Prop ``( \Type$ but that is another problem).
This is used by the extraction mechanism \cite{DBLP:conf/types/Letouzey02} 
to extract \emph{only} the algorithmical code from
a \Coq term. However, the extraction is not used directly inside the
proof-checker, so computations in \Coq may freeze because of
irreducible proofs in the term, even when they are irrelevant; we will
discuss this later issue in section \ref{sec:Discussion}. For now we
will concentrate on the problem of writing richly specified programs in
\Coq by taking advantage of this distinction between \Prop~and \Type.


Usually, as soon as you have propositions appearing
in the type of a function, you will write its body in \Coq using tactics
instead of using the functional language because otherwise you would
have to directly manipulate proof terms. This is a showstopper because
it becomes intractable as soon as you have a complex proof to do,
especially since there is is not so much support for incremental refinement 
in \Coq, unlike in \Agda \cite{DBLP:conf/tphol/Coquand06} or \Epigram
\cite{DBLP:journals/jfp/McBrideM04}, although the \tactic{refine} \emph{tactic}
can give the same flavor of program development \cite{PositionPLPV06}.

\Russell is a programming language built on top of \Coq which permits writing
only the algorithmical code of strongly specified functions and forgetting about the
required proofs which \Coq needs to establish correctness
(i.e. typecheck the term).
Its main purpose is to act as a convenient, permissive
source language which elaborates into \CIC, which can be type-checked
by the safe kernel of \Coq. Here, permissive means the user is not
restricted to using structural recursion, complete
pattern-matching and total functions only, as we have seen up to now. 
Obviously, you have to give evidence that
your source term can be seen as a legitimate \Coq term, which generally
means that what you left out can be proved. This is done by solving obligations which
are generated \emph{after} the type-checking procedure of \Russell terms
by an interpretation into \CIC. For a
detailed presentation of this procedure, the reader is directed to
\cite{sozeau:Coq/Russell/article}. The generation of obligations is done
in a similar way as in \PVS \cite{PVS-Semantics:TR}, using subsets to
carry propositions. We will describe this essential feature of the
\Russell type system and its extension to the richer \CIC calculus next,
then move on gradually to our motivating example: Finger Trees.

\subsection{Subset equivalence}
Subset equivalence is a simple idea: two subsets 
over the same carrier may be considered equal, given a proof that
they contain the same elements. We can leave the properties out and just consider
the carrier of subsets. That is, if we have an object
$t$ of type $T$ where we expect an object of type $\{ x : T `| P \}$, we
may say ``ok, you will have to give me a proof that $P[t/x]$ holds but
we are good for now''. Conversely, considering an object of type
$\{ x : T `| P \}$ as an object to type $T$ is alright. This
is the essential idea behind the type system of \Russell: it checks
type equivalence on the carriers only, ignoring if the properties really
hold. We need only enrich the type conversion relation of \CIC with the
rule given figure \ref{fig:russell:subequiv} to formalize this idea.

\begin{figure}[h] 
  \begin{center}
    \AXC{$`G \vdash T `= U : \Type$}
    \AXC{$`G, x : U \vdash P : \Prop $}
    \BIC{$`G \vdash T `= \{ x : U `| P \} : \Type$}
    \DP
%     \vspace{0.5em}
%     \AXC{$`G \vdash T `= U : \Type$}
%     \AXC{$`G, x : T \vdash P : \Prop$}
%     \BIC{$`G \vdash \{ x : T `| P \} `= U : \Type$}
%     \DP
  \end{center}
  \caption{Subset equivalence rule}
  \label{fig:russell:subequiv}
\end{figure}

As an example, we can derive: 
\begin{prooftree}
  \AXC{$`G \type 0 : `N$}
  \AXC{$`G \type `N `= `N : \Type$}
  \AXC{$`G, x : `N \type x = 0 : \Prop$}
  \BIC{$`G \type `N~`= \mysubset{x}{`N}{x = 0} : \Type$}
  \BIC{$`G \type 0 : \mysubset{x}{`N}{x = 0}$}
\end{prooftree}

After a term has been typed in this system, we can automatically
generate the obligations and produce a full-fledged, well-typed \Coq
term once these are solved. Following the example, a single obligation
demanding that $0 = 0$ be proved will be generated which will easily be
solved by reflexivity of equality. But we could have derived 
$`G \type 0 : \mysubset{x}{`N}{x \neq 0}$ in \Russell as well, in which case the
generated obligation could certainly not be solved except if $`G$ is
inconsistent. This phase distinction is essential: as soon
as we want to do any proof we jump into an undecidable world because we
have the full higher-order logic of \Coq at hand, so we must not have
any proving to do if we want our type-checking to be decidable. This is
to contrast with the approaches of other dependently-typed programming
languages, from \DML \cite{XiPfenning1999DTP} to \Epigram. In these languages either the expressive power is
limited so that an automatic prover can solve obligations during
type-checking (e.g. Pressburger arithmetic in \DML) or there is no limit
but proofs and code are intermingled in the same language (for \Epigram,
\Coq and \Agda) or a mix of the two styles in \ATS \cite{Xi:ATStypes03} or
\Omegapdx \cite{DBLP:journals/sigplan/Sheard04} for example.

\subsection{\Russell's incarnation: \Program}
Using \Russell we can write arbitrarily complex code with arbitrarily
complex specifications, it will typecheck only if it is ``structurally''
well-typed. We do not care about proofs at the type-checking step but
later, when recombining obligations
with the algorithmical skeleton we wrote. Let us define the strongly
specified euclidean division of $a$ by $b$ where $b$ must be non-zero.
This function is defined by well-founded recursion using the
less-than order on the argument $a$:

\def\coqdocindent#1{\noindent\kern#1\hskip1.00em}
\begin{coq}
\coqdocindent{0.00em}
\coqdockw{Program Fixpoint} \coqdocid{div} (\coqdocid{a} : \coqdocind{nat}) (\coqdocid{b} : \coqdocind{nat} \coqor  \coqdocid{b} \ensuremath{\not=} \constr{0}) \{ \coqdocid{wf} \coqdocid{lt} \} :\coqdoceol
\coqdocindent{1.00em}
\{ (\coqdocid{q}, \coqdocid{r}) : \coqdocind{nat} \ensuremath{\times} \coqdocind{nat}
\coqor \coqdocid{a} = \coqdocid{b} \ensuremath{\times}
\coqdocid{q} + \coqdocid{r} \ensuremath{\land} \coqdocid{r} $<$ \coqdocid{b} \} :=\coqdoceol
\coqdocindent{1.00em}
\coqdockw{if} \coqdocid{less\_than} \coqdocid{a} \coqdocid{b} \coqdockw{then} (\coqdocconstr{0}, \coqdocid{a})\coqdoceol
\coqdocindent{1.00em}
\coqdockw{else} \coqdockw{dest} \coqdocid{div} (\coqdocid{a} - \coqdocid{b}) \coqdocid{b} \coqdockw{as} (\coqdocid{q'}, \coqdocid{r}) \coqdockw{in} (\coqdocconstr{S} \coqdocid{q'}, \coqdocid{r}).\coqdoceol
\end{coq}

\noindent The \kw{Program} vernacular indicates that we use the \Russell
type-checker followed by the interpretation into \Coq instead of \Coq's
original type-checker.  The construct \kw{dest} \id{t} \kw{as} \id{p} \kw{in} \id{b} is
equivalent to a destructive \kw{let} construct where $p$ is an arbitrary
pattern (allowing one to pull values from nested tuples for example) but
it also reflects the equality between \id{t} and \id{p} in the context
for later use in the obligations.
We also use sugar for the binder $(x : \ind{nat} `| P) "=" (x : \{ x :
\ind{nat} `| P \})$.
Finally, the \kw{if} construct works on any inductive type with two constructors. 


This code type-checks and \Program generates obligations which are discharged
automatically by \Coq's tactics except the one for the inductive case.
We can discharge it interactively using \Coq's proof language. When all
obligations are solved the \Program system adds the completed definition of $\id{div}$
in the environment. We can then use $\id{div}$ like any other \Coq
object, reasoning and computing with it as desired. 


We were able to discharge the obligations because hypotheses $a
``< b$ or $a ``>= b$ were present in their contexts. However, these
hypotheses are not automatically generated by \Coq. They come directly
from the term which uses a dependent function \id{less\_than} of type: 
$`A x\,y, \{ x ``< y \} + \{ x ``>= y \}$. Had we simply used a boolean
function instead, we would have no information about the comparison in
the obligations. Hence we must always take care of \emph{reflecting} the
logic in our code if we want to reason on it later. In case of boolean
conditionals, we can always use the combinator $\id{dec} : `A b, \{ b =
\constr{true} \} + \{ b = \constr{false} \}$ to achieve this
reflection.

\subsection{Extension to inductive types}
It is natural to extend the support for subset types in \Russell to 
inductive families. For example, we have seen we can have two encodings 
of vectors, one using subsets and another using an inductive family.
The facility we added for
handling subset types should then transpose easily to inductive types.
Indeed we can obtain the \Russell derivation:

% {\scriptsize \begin{prooftree}
%   \AXC{$`G \type x : \mysubset{x}{\id{list}~A}{\id{length} = m}$}
%   \AXC{$`G \type \id{list}~A `= \id{list}~A : \Type$}
%   \AXC{$\cdots$}
%   \BIC{$`G \type \mysubset{x}{\id{list}~A}{\id{length} = m} `= \mysubset{x}{\id{list}~A}{\id{length} = n} : \Type$}
%   \BIC{$`G \type x : \mysubset{x}{\id{list}~A}{\id{length} = n}$}
% \end{prooftree}}

% and
\begin{prooftree}
  \AXC{$`G \type x : \ind{vector}~A~m$}
  %\AXC{$\vspace{-1em}`G \type \id{vector}~A `= \id{vector}~A : \id{nat}
  % "->" \Type$}
  \AXC{$`G \type \ind{vector}~A~m `= \ind{vector}~A~n : \Type$}
  \BIC{$`G \type x : \ind{vector}~A~n$}
\end{prooftree}

The type conversion results from an instantiation of the rule given figure
\ref{fig:russell:indequiv} which states
that an inhabitant of an inductive family $I$ with indexes $\tele{x_i}$
also inhabits the type $I~\tele{y_i}$ (coercion does not happen on parameters).

\begin{figure}[h]
  \begin{center}
%    \AXC{$`G \vdash t : I~\tele{x_i}$}
    % \AXC{$`G, {\tele{(x_i : X_i `~ y_i : Y_i)}}_{0}^{j-1} \vdash x_j :
%       X_j `~ y_j : Y_j$}
%     \RightLabel{$j `: [0..i]$}
    \AXC{$I~\mathbf{inductive~family}$}
    \AXC{$`G \vdash I~\tele{x_i}, I~\tele{y_i} : \Type$}
    \BIC{$`G \vdash I~\tele{x_i} `= I~\tele{y_i} : \Type$}
    \DP    
    \caption{Dependent inductive equivalence rule}
    \label{fig:russell:indequiv}
  \end{center}
\end{figure}

When interpreting into \Coq, proof obligations will be generated to show 
that indices are equal. For the example above, a proof of $`G \vdash m =
n : \Prop$ will be required. This extension was crucial in the development of
Dependent Finger Trees where a good part of the obligations are
generated due to applications of this rule. 

Dually to this construction used for building an inductive object with
arbitrary index, we have enriched the pattern-matching construct to get
information about destructed values and their indexes
(i.e. automatically reflecting pattern-matching at the logical level).
Informally, in our setting each branch of a
pattern-matching construct will be typed in a context extended with equalities between the
patterns and the matched values up to their indices. For example, in a
context where \id{v} is of type $\ind{vector}~A~(\constr{S}~n)$, when
typing the term $\kw{match}~v~\kw{with}~\constr{vnil}~\Rightarrow~\cdots~|~\constr{vcons}~x~n'~v'~\Rightarrow~\cdots~\kw{end}$
the first branch will be typed in a context containing hypotheses $\constr{S}~n = \constr{0}$ and 
% matching a term $v$ of type $\ind{vector}~A~(\constr{S}~n)$ with two branches having patterns $\constr{vnil}$
% and $\constr{vcons}~x~n'~v'$, the first branch will be typed in a
% context containing hypotheses $\constr{S}~n = \constr{0}$ and 
$v "~=" \constr{vnil}$, hence an absurd case. The equality symbol $"~="$ 
will be presented in \S~\ref{sec:DependenceHell}.
We will also be able to deduce that $n = n'$ in the second branch
because we will have an equality $\constr{S}~n = \constr{S}~n'$ in the
context (and constructors are injective).
As a further refinement of pattern-matching, we add inequality
hypotheses in the context of branches for which the pattern was
intersecting with a previous one. Consider the following code:

\begin{coq}
\coqdocindent{0.00em}
\coqdockw{Program Fixpoint} \coqdocid{div2} (\coqdocid{n} : \coqdocind{nat}) : \coqdoceol
\coqdocindent{1.00em}
\{ \coqdocid{n'} : \coqdocind{nat} \coqor  \coqdocid{n} = 2 \ensuremath{\times} \coqdocid{n'} \ensuremath{\lor} \coqdocid{n} = 2 \ensuremath{\times} \coqdocid{n'} + 1 \} :=\coqdoceol
\coqdocindent{1.00em}
\coqdockw{match} \coqdocid{n} \coqdockw{with}\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocconstr{S} (\coqdocconstr{S} \coqdocid{p}) \ensuremath{\Rightarrow} \coqdocconstr{S} (\coqdocid{div2} \coqdocid{p})\coqdoceol
\coqdocindent{2.00em}
\coqor  \coqdocid{x} \ensuremath{\Rightarrow} 0\coqdoceol
\coqdocindent{1.00em}
\coqdockw{end}.
\end{coq}

The second branch is typed in a context extended with the hypotheses $x =
n$ and $`A p, x \neq \constr{S}~(\constr{S}~p)$, which is crucial to solve the
obligation $n = \constr{2} * \constr{0} `V n = \constr{2} * \constr{0} + \constr{1}$.

\paragraph{A conclusion on \Russell}
We think having an unrestricted power of expression in the logic is essential
to having useful software development and certification
tools and our method achieves it without losing the facilities
we are accustomed to in a practical programming language. So we get
decidability of type-checking and our programs look exactly like their
simply-typed counterparts with no proof handling code
appearing in the term but we also have a highly expressive logic and a
complete system to prove and check statements in this logic. 
% This combination
% gives us a certified programming language with unmatched support for
% proving, as we will demonstrate now.

\section{Dependent Finger Trees}
\label{sec:FingerTrees}
We will now illustrate \Russell's expressiveness by presenting a certified implementation of
Finger Trees in \Coq. Finger Trees \cite{hinze:FingerTrees} are a
functional, general purpose, efficient data structure based on an
optimization of 2-3 trees. It is implemented in \Haskell as a \emph{nested} datatype
which is parameterized by a type of \emph{measures}, which can be
instantiated by various types to give specializations of the Finger
Trees. The data structure builds upon simpler structures like digits and
2-3 nodes which we will present first.

\subsection{Digits}

A digit is simply a buffer of one to four values. 

\def\coqdocindent#1{\noindent\kern#1}
\medskip
\noindent
\coqdockw{Section} \coqdocid{Digit}.\coqdoceol
\coqdocindent{1.00em}
\coqdockw{Variable} \coqdocid{A} : \coqdockw{Type}.\coqdoceol
\coqdocindent{1.00em}
\coqdockw{Inductive} \coqdocind{digit} : \coqdockw{Type} := \coqdoceol
\coqdocindent{1.00em}
\coqor  \coqdocconstr{One} : \coqdocid{A} \ensuremath{\to} \coqdocind{digit}\coqdoceol
\coqdocindent{1.00em}
\coqor  \coqdocconstr{Two} : \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocind{digit}\coqdoceol
\coqdocindent{1.00em}
\coqor  \coqdocconstr{Three} : \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocind{digit}\coqdoceol
\coqdocindent{1.00em}
\coqor  \coqdocconstr{Four} : \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocid{A} \ensuremath{\to} \coqdocind{digit}.\coqdoceol
\medskip

\noindent
We build simple functional predicates on digits for use in
specifications. 
\extended{\ind{True} and \ind{False} are both an inductive types
living in \Prop. The former has a single constructor with no arguments and the later none.
They should not be confused with the booleans \constr{true} and \constr{false} which are
the two constructors of the \ind{bool} inductive type living in \Type.}

\medskip
\label{def:full}
\coqdocindent{1.00em}
\coqdockw{Definition} \coqdocid{full} (\coqdocid{x} : \coqdocind{digit}) : \coqdockw{Prop} := \coqdoceol
\coqdocindent{2.00em}
\coqdockw{match} \coqdocid{x} \coqdockw{with} \coqdocconstr{Four} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} \ensuremath{\Rightarrow} \coqdocconstr{True} \coqor  \coqdocid{\_} \ensuremath{\Rightarrow} \coqdocconstr{False}  \coqdockw{end}.\coqdoceol
\medskip

\noindent
We now define addition of an element to the left of a digit. 

\medskip
\label{def:adddigitleft}
\coqdocindent{1.00em}
\coqdockw{Program Definition} \coqdocid{add\_digit\_left} (\coqdocid{a} : \coqdocid{A})\coqdoceol
\coqdocindent{2.00em}
(\coqdocid{d} : \coqdocind{digit} \coqor  \ensuremath{\lnot} \coqdocid{full} \coqdocid{d}) : \coqdocind{digit} :=\coqdoceol
\coqdocindent{2.00em}
\coqdockw{match} \coqdocid{d} \coqdockw{with}\coqdoceol
\coqdocindent{3.00em}
\coqor  \coqdocconstr{One} \coqdocid{x} \ensuremath{\Rightarrow} \coqdocconstr{Two} \coqdocid{a} \coqdocid{x}\coqdoceol
\coqdocindent{3.00em}
\coqor  \coqdocconstr{Two} \coqdocid{x} \coqdocid{y} \ensuremath{\Rightarrow} \coqdocconstr{Three} \coqdocid{a} \coqdocid{x} \coqdocid{y}\coqdoceol
\coqdocindent{3.00em}
\coqor  \coqdocconstr{Three} \coqdocid{x} \coqdocid{y} \coqdocid{z} \ensuremath{\Rightarrow} \coqdocconstr{Four} \coqdocid{a} \coqdocid{x} \coqdocid{y} \coqdocid{z}\coqdoceol
\coqdocindent{3.00em}
\coqor  \coqdocconstr{Four} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} \ensuremath{\Rightarrow} !\coqdoceol
\coqdocindent{2.00em}
\coqdockw{end}.\coqdoceol
\medskip

\noindent
It has become interesting here, as we define a \emph{partial} function. 
We can add to a digit if and only if it is not already full. 
So, we require the argument digit to be accompanied by a proof that it is not full.
We will use it to prove that the last branch is inaccessible.
Note that we can pattern-match on 
\coqdocid{d} as if it was just a digit: properties have no influence on code, only on proofs.

As in \Coq, there has to be a branch for each and every
constructor in \Russell pattern-matchings, but we can use \verb|!| to assert that a branch
is inaccessible due to contextual information on the matched object.
We must treat every constructor because exhaustiveness of pattern
matching with dependent types is undecidable in general.
The generated obligation (figure \ref{fig:obligation}) is easily solved, as we have a contradiction in the context: 
both \ensuremath{\lnot} \coqdocid{full} \coqdocid{d} and \coqdocid{d} = \coqdocconstr{Four} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} \coqdocid{\_} are present.
We can define in a similar fashion addition on the right and the various 
accessors on digits (\coqdocid{head}, \coqdocid{tail}, \coqdocid{last} and \coqdocid{liat}). 
From now on we will omit the obligations and the proof
scripts used to solve them; they can be found in the \Coq contribution.
   
\begin{figure}[t]
  \[\begin{array}{lcl}
    \id{a} & : & \id{A} \\
    \id{d} & : & \ind{digit} \\
    \id{Hd} & : & \ensuremath{\lnot}~\coqdocid{full}~\coqdocid{d} \\
    \id{x}, \id{y}, \id{z}, \id{w} & : & \id{A} \\
    \id{Heq\_d} & : & d = \constr{Four}~\id{x}~\id{y}~\id{z}~\id{w} \\
    \hline
    \constr{False} & &
  \end{array}\]  
  \caption{Obligation of \id{add\_digit\_left}}
  \label{fig:obligation}
\end{figure}

As an other example of the usefulness of \Program, consider defining the monoid
$(\nil, \app)$ on lists. To construct a monoid implementation (c.f. \S\ref{def:monoid}), we have
to apply the \coqdocconstr{mkMonoid}
constructor to $\nil$, $\app$ and proofs of identity and associativity. Fortunately, those can be filled
automatically by \Coq using lemmas proved in the standard library.
When \Program processes the following definition it turns the holes
(\coqdocid{\_}) in the term into obligations which are thus
automatically discharged. We explicitly give the element and operation 
using the syntax (\coqdocid{x}:=\id{t}) ; 
they are considered implicit arguments by the system as they are derivable from
the types of the proofs.

\begin{coq}
\noindent
\coqdockw{Program Definition} \coqdocid{listMonoid} (\coqdocid{A} : \coqdockw{Type}) : \coqdocind{monoid} (\coqdocind{list} \coqdocid{A}) := \coqdoceol
\coqdocindent{1.00em}
\coqdocconstr{mkMonoid} (\id{mempty}:=\coqdocconstr{nil}) (\id{mappend}:=\coqdocid{app}) \coqdocid{\_} \coqdocid{\_} \coqdocid{\_}.\coqdoceol
\end{coq}
\input{DependentFingerTree}
     
The complete file we walked through comprises 600 lines of specifications (declarations, types, programs,
proof statements) and 450 lines of proof. In comparison the source file provided by 
Hinze \& Paterson is 650 lines long including comments. We argue that this is proof of the scalability of \Russell
as a programming language.

\section{Instances}
\label{sec:Instances}
We will now instantiate the \ind{fingertree} implementation by particular
monoids and measures to get higher-level structures.
Obviously, we can instantiate the Finger Trees just like in the original
paper of Hinze \& Paterson using weak, simply typed specifications and extract the code, but that would not be too
exciting. Nevertheless, we will first present a way to recover the
simply-typed interface and instantiate it to ordered sequences. We will
then focus on proving a particular specialization using the dependent interface directly.

\input{FingerTree}
\def\coqdocindent#1{\noindent\kern#1}
\input{OrdSequence}
\input{DependentSequence}

\section{Extraction}
\label{sec:Extraction}
We can extract the code we just certified to both \Haskell and {\sc \citet*{Ocaml}}
(bypassing the type checker for polymorphic recursion)
to get certified implementations of Finger Trees and sequences.
We are guaranteed to get the same algorithmic code than what we wrote using \Program.
This is an important aspect of developing with \Russell, because usually when
developing programs with rich specifications in \Coq one tends to use the proof
language as much as the programming language and this can have
devastating results on the extracted code. Our method enforces a
distinction between code and proofs that is healthy in this respect.

Unsurprisingly, our extracted Finger Trees have indeed the same performance
as the original implementation: they have the same code. The \module{Data.Sequence}
implementation in \Haskell is in fact a specialization of the \ind{FingerTree}
implementation to a particular monoid and measure, avoiding dynamic
lookups and unnecessary boxing. Had we used the module system of \Coq to
parameterize our development instead of the section mechanism, we would
have had this instantiation at no cost. However the lack of a powerful module
system in \Haskell prevented us from doing that, as we would not be able to
extract to it then.

Unfortunately, extraction of our dependent sequences will not give
excellent results because the measure contains a function giving the map 
which will be extracted even though it has no algorithmical role.
We are in a case where only part of an index need be stored (unlike 
the setting of \citet{DBLP:conf/types/BradyMM03}), so a
finer distinction than $\Prop$/$\Type$ has to be found to distinguish
between algorithmical and non-algorithmical content. 
Current work by B. Barras and B. Bernardo on an adaptation of 
the Calculus of Implicit Constructions \cite{DBLP:conf/tlca/Miquel01} as the 
core calculus of a dependently-typed language ought to give the expressivity we
seek. 

We conclude that \Russell is not overly verbose and that doing the
proofs is actually not a insurmountable task,
knowing that most of them were solved automatically using a normalization
tactic for monoid expressions.

\section{Discussion}
\label{sec:Discussion}
\subsection{Proof-Carrying Code}
\label{sec:PI}
As displayed before, \Coq ultimately melts proof and code in its terms,
so as to be able to check the correctness of any part of a term at any
time. On the surface, we can manage to separate the two activities of
coding and proving using \Program, but there are parts of
the system where it still bites. The same notable problem appear during
proofs and computations. When proving, we are faced with terms containing
proofs we do not want to name or manipulate and when computing (in the
call-by-value case only) the system itself is stuck with irreducible opaque proofs.
However, in the two cases the proofs appear only in parts of the term that
should not be reduced because they have no algorithmical content and
need not be manipulated as their structure has no importance. This
is formalized by proof-irrelevance (PI), the statement that two proofs of the
same proposition are equal. The meta theory of the Calculus of Constructions
extended with PI has been studied extensively by Werner and
Miquel. Recently, \citet{Werner:ProofIrrelevance} also
gave a method to implement it effectively in \Coq, which is currently pursued
by the author. It has also been studied in the context of Observational
Type Theory, the (would be) core calculus of \Epigram 2.
Having PI in the system would make these two problems disappear
and would probably improve performance of computations inside \Coq too.

\subsection{A note on the implementation}
\Program is composed of two distinct parts: the type-checker and the
proof-obligation handling machinery.
In fact only a small part of the \Coq type-checker needed to be changed, 
so \Russell's type-checker is just a different instantiation of
the typing functor of \Coq which is parameterized by the conversion
algorithm. This is to contrast with the work by Catherine Parent on
the previous \Program tactic which was a completely different solution
for program-synthesis in \Coq. The system was %\cite{conf/mpc/Parent95}
not integrated with \Coq and so went unmaintained.

\subsection{Related Work}
\Russell can be seen as one point in the design space of
programming languages with dependent types. Its most distinctive feature is
the separation of code from proof, \`a la \PVS, and its treatment of
subsets and indexed data types. It is also original in the sense that it
\emph{elaborates} into a well-studied, safe and mature language while
most other solutions coming from the programming languages community
have no such safety net or only for part of the system. 
\Russell could in fact be implemented in \Epigram or \Agda
rather easily as they are based on roughly the same foundations as \Coq. The
main difference is that \Coq already has a comprehensive standard
library and a tactic system. Compared to programming languages like
\ATS, \Concoqtion \cite{DBLP:conf/pepm/FogartyPST07}
or \Haskell, \Russell has no effects and only well-founded recursion.
We found that in practice the programs we write
usually have relatively simple termination arguments 
and real non-termination could be accommodated by using a coinductive encoding. 
We think having a layered approach to treating computational effects
using functional encodings like monads is a more disciplined approach
to development and certification than trying to design a system with
some computational effects built-in, if this is at all possible while keeping
strong properties.

Other certifications of tree-based data structures include a certification
of the AVL trees used in \Ocaml and also red-black trees in \Coq
\cite{DBLP:conf/esop/FilliatreL04}.
The development was done by first building a dependent interface and
providing a simpler one on top of it. The second author reported great
success adapting the dependently-typed implementation which was
originaly done using tactics to \Program.

\section{Conclusion}
What we would like to assess is that writing programs in \Russell is not
more difficult than in \Haskell, with the added benefit of a complete environment
for proving properties about your programs in a highly expressive
specification language. Clearly, there is room for
improvement on the side of programming language technology, for example
some kind of overloading mechanism would allow more conciseness, but
\Russell is already very powerful in terms of specification possibilities and
actual support for proving properties, as it (re-)lies on an already mature
proof assistant. One thing we learned is that once we begin to use dependent types,
embracing them completely become a sensible and desirable goal, carrying
the strong properties bottom-up. \Program allows to do this in a
tractable and hopefully maintainable way. 

\paragraph{Acknowledgments}
Thanks to Christine Paulin-Mohring, Jean-Christophe Filli\^atre and the
anonymous referees for their insightful comments on drafts
of this paper, along with Adam Chlipala and my colleagues of the
\Demons team for their lasting enthusisasm and criticism. 
Finally, I thank Yann R\'egis-Gianas for introducing me to Finger Trees.

\newpage
\bibliography{abbrevs,adamc,epigram,crossrefs,crossrefs2,demons,demons2,demons3,pvs,my,base,letouzey,systems,bcp,biblio-subset,dblp}
\bibliographystyle{myplainnat}
\end{document}
\appendix
\section*{Index}
All functions are parameterized by a type $A$.

\begin{figure}[h]
\begin{tabular}{ll}
Function name & Reference or Specification \\
\hline
\id{full} & \S\ref{def:full} \\
\id{single} & \coqdocind{digit} \coqdocid{A} \ensuremath{\rightarrow} \kw{Prop} \\
\id{add\_digit\_left} & \S\ref{def:adddigitleft} \\

% \id{add\_digit\_right} & (\coqdocid{A} : \coqdockw{Type}) 
% (\coqdocid{d} : \coqdocind{digit} \coqdocid{A} \coqor
% \ensuremath{\lnot} \coqdocid{full} \coqdocid{d}) (\coqdocid{a} :
% \coqdocid{A}) : \coqdocind{digit} \coqdocid{A} \\

\coqdocid{digit\_head} & \coqdocind{digit} \coqdocid{A} $"->"$
\coqdocid{A} \\

\coqdocid{digit\_tail} &
(\coqdocid{d} : \coqdocind{digit}
\coqdocid{A} \coqor  \ensuremath{\lnot} \coqdocid{single}
\coqdocid{d}) $"->"$ \coqdocind{digit} \coqdocid{A} \\

\coqdocid{digit\_measure} &
(\coqdocid{A} \ensuremath{\rightarrow}
\coqdocid{v}) $"->"$ \coqdocind{digit} \coqdocid{A} $"->"$ \coqdocid{v} \\

\coqdocid{option\_digit\_measure} &
(\coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v}) $"->"$ 
\ind{option} (\coqdocind{digit} \coqdocid{A}) $"->"$ \coqdocid{v} \\

\id{add\_left} & \S\ref{def:addleft} \\
\id{add\_right} & (\coqdocid{m} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v})
(\id{s} : \id{v}) (\coqdocid{t} : \coqdocind{fingertree} \id{m} \id{s}) \\
& (\id{x} : \id{A}), \coqdocind{fingertree} \coqdocid{m} (\id{s} $\cdot$ \coqdocid{m} \coqdocid{x}) \\
 
\id{digit\_to\_tree} & (\coqdocid{m} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v})
(\coqdocid{d} : \coqdocind{digit} \coqdocid{A}) \ensuremath{\rightarrow} \\
& \coqdocind{fingertree} \coqdocid{m} (\coqdocid{digit\_measure}
\coqdocid{m} \coqdocid{d}) \\

\id{option\_digit\_to\_tree} & (\coqdocid{m} : \coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v})
(\coqdocid{d} : \ind{option} (\coqdocind{digit} \coqdocid{A})) \ensuremath{\rightarrow} \\
& \coqdocind{fingertree} \coqdocid{m} (\coqdocid{digit\_measure}
\coqdocid{m} \coqdocid{d}) \\

\coqdocid{tree\_size} &
(\coqdocid{m} : \coqdocid{A} \ensuremath{\rightarrow}
\coqdocid{v}) (\coqdocid{s} : \coqdocid{v}), \\
& \coqdocind{fingertree} \coqdocid{m} \coqdocid{s} \ensuremath{\rightarrow} \coqdocind{nat} \\

\coqdocid{View\_L\_size} & (\coqdocid{m} :
\coqdocid{A} \ensuremath{\rightarrow} \coqdocid{v}),\\
& \coqdocind{View\_L} \coqdocid{A} (\coqdocind{fingertree}
\coqdocid{m})) \ensuremath{\rightarrow} \coqdocind{nat} \\

\id{view\_L} & \S\ref{def:viewL} \\
\id{deep\_L} & \S\ref{def:deepL} \\

\id{isEmpty, isEmpty\_dec} & \S\ref{sec:Back_To_Normal} \\
\id{head}, \id{last}, \id{tail}, \id{liat} & \S\ref{sec:Back_To_Normal} \\

\id{split\_digit}, \id{split\_node} & \S\ref{def:splitnode} \\
\id{app}, \id{split\_tree} & \S\ref{def:split_tree} \\

\id{cat} & \coqdocind{FingerTree} \id{A} \ensuremath{\times}
\coqdocind{FingerTree} \id{A} $"->"$ \\
& \coqdocind{FingerTree} \id{A} \\
\id{cons} & \id{A} \ensuremath{\rightarrow} \coqdocind{FingerTree} \id{A} \ensuremath{\rightarrow}
\coqdocind{FingerTree} \id{A} \\
\coqdocid{split\_with} & \S\ref{def:split_with} \\

\coqdocid{empty} & \coqdocind{seq} \coqdocid{A} \coqdocid{epsilon} \\
\id{make}, \id{get}, \id{set}, \id{split} & \S\ref{def:split}

\end{tabular}
\end{figure}
\end{document}
%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% TeX-PDF-mode: t
%%% LaTeX-command: "x=pdf; TEXINPUTS=\"~/research/publication/styles:../doc:\" ${pdfx}latex"
%%% TeX-view-style: (("." "~/bin/openpdf %s.pdf"))
%%% End: 

