diff --git a/WuV/WuV_notes.pdf b/WuV/WuV_notes.pdf index 767ef5b..a6f1ae0 100644 Binary files a/WuV/WuV_notes.pdf and b/WuV/WuV_notes.pdf differ diff --git a/WuV/WuV_slides.pdf b/WuV/WuV_slides.pdf index 28e3cfc..faa6bff 100644 Binary files a/WuV/WuV_slides.pdf and b/WuV/WuV_slides.pdf differ diff --git a/WuV/WuV_slides.tex b/WuV/WuV_slides.tex index a1400a0..8b99b2c 100644 --- a/WuV/WuV_slides.tex +++ b/WuV/WuV_slides.tex @@ -6,6 +6,7 @@ \usepackage{amsmath} \usepackage{amssymb} \usepackage{xkeyval} +\usepackage{tikz-cd} \usepackage{../macros/mytikz} \usepackage{listings} diff --git a/WuV/WuV_slides_body.tex b/WuV/WuV_slides_body.tex index bab8328..9b1aa56 100644 --- a/WuV/WuV_slides_body.tex +++ b/WuV/WuV_slides_body.tex @@ -1218,22 +1218,21 @@ \section{Concrete Knowledge and Typed Ontologies} \end{frame} \begin{frame}\frametitle{Identifiers} -\begin{blockitems}{Curry-Typed Knowledge graph} +\begin{blockitems}{Curry-Typed Knowledge Graph} \item concept, relation, property names given in TBox \item individual names attached to nodes \end{blockitems} \begin{blockitems}{Church-Typed Database} \item table, column names given in schema -\item row identified by distinguished column (= key) \\ -options +\item row identified by distinguished key\\[0.4em] +options: \begin{itemize} - \item preexistent characteristic column + \item preexistent characteristic column or column set \item added upon insertion \begin{itemize} \item UUID string \item incremental integers - \item concatenation of characteristic list of columns \end{itemize} \end{itemize} \item column/row identifiers formed by qualifying with table name @@ -1271,7 +1270,7 @@ \section{Concrete Knowledge and Typed Ontologies} \item field types must be base types, $a:A$ not allowed \item allow $ID$ as additional base type \item use field $a:ID$ in table $B$ -\item store value of $b$ in table $A$ +\item store value of $a$ in table $A$ \end{blockitems} \begin{blockitems}{Inheritance: $B$ inherits from $A$} @@ -1401,7 +1400,9 @@ \section{Motivation} \begin{itemize} \item essentially the same in every language \lec{including whatever language used for semantics} -\item semantics by translation trivial +\item semantics by translation straight-forward +\glec{watch out for different endiannesses for ints,} +\glec{charsets and encodings for characters/strings} \end{itemize} \end{frame} @@ -1555,7 +1556,8 @@ \section{Data Types} \item IEEE float, double \item Booleans \item Unicode characters - \item strings \glec{could be list of characters but usually bad idea} + \item strings\\ + \glec{could be list of characters but usually bad idea} \end{blockitems} \begin{blockitems}{typical in math} @@ -1606,7 +1608,7 @@ \section{Data Types} \end{itemize} \end{itemize} \lec{dependent types much more complicated, less uniformly used} -\lec{harder to starndardize} +\lec{harder to standardize} \end{frame} \begin{frame}\frametitle{Collection Data Types} @@ -1727,14 +1729,14 @@ \section{Data Representation Languages} \begin{blockitems}{Candidates} \item XML: standard on the web, notoriously verbose - \item JSON: JavaScript objects, more human-friendly text syntax - \lec{older than XML, probably better choice than XML in retrospect} + \item JSON: minimal, more human-friendly text syntax + \lec{newer than XML, very dominant in JS ecosystem} \item YAML: line/indentation-based \end{blockitems} \end{frame} \begin{frame}{Breakout Question} -What is the difference between JSON, YAML, XML? +What is the difference between XML, JSON, and YAML? \end{frame} \begin{frame}\frametitle{Typical Data Representation Languages} @@ -1754,49 +1756,48 @@ \section{Data Representation Languages} \end{blockitems} \end{frame} -\begin{frame}\frametitle{Example: JSON} +\begin{frame}[fragile]\frametitle{Example: JSON} JSON: -\[\mathll{ - \{\\ - \tb "individual": "Florian Rabe",\\ - \tb "age": 40,\\ - \tb "concepts": ["instructor", "male"],\\ - \tb "teach": [\\ - \tb\tb\{"name":"Wuv",credits:7.5\},\\ - \tb\tb\{"name":"KRMT",credits:5\}\\ - \tb]\\ -\} -}\] - +\begin{lstlisting}[basicstyle=\footnotesize] +{ + "individual": "FlorianRabe", + "age": 40, + "concepts": ["instructor", "male"], + "teach": [ + {"name": "WuV", credits: 7.5}, + {"name": "KRMT", credits: 5} + ] +} +\end{lstlisting} Weirdnesses: \begin{itemize} -\item atomic/list/record = basic/array/object +\item atomic/list/record called basic/array/object \item record field names are arbitrary strings, must be quoted \item records use $:$ instead of $=$ \end{itemize} \end{frame} -\begin{frame}\frametitle{Example: YAML} +\begin{frame}[fragile]\frametitle{Example: YAML} inline syntax: same as JSON but without quoted field names alternative: indentation-sensitive syntax -\[\mathll{ - individual: "Florian Rabe"\\ - age: 40\\ - concepts:\\ - \tb -\; "instructor"\\ - \tb -\; "male"\\ - teach:\\ - \tb -\; name: "WuV"\\ - \tb\phantom{-}\; credits: 7.5\\ - \tb -\; name: "KRMT"\ - \tb\phantom{-}\; credits: 5\\ -}\] +\begin{lstlisting}[basicstyle=\footnotesize] +individual: "Florian Rabe" +age: 40 +concepts: + - instructor + - male +teach: + - name: WuV + credits: 7.5 + - name: KRMT + credits: 5 +\end{lstlisting} Weirdnesses: \begin{itemize} -\item atomic/list/record = scalar/collection/structure +\item atomic/list/record called scalar/collection/structure \item records use $:$ instead of $=$ \end{itemize} \end{frame} @@ -1809,22 +1810,22 @@ \section{Data Representation Languages} \end{itemize} \begin{lstlisting}[basicstyle=\footnotesize] - - - instructor - male - - - - - - + + + instructor + male + + + + + + \end{lstlisting} \begin{itemize} -\item Good: \lstinline|Person|, \lstinline|Course|, \lstinline|Concept| give type of object +\item Good: \lstinline||, \lstinline||, \lstinline|| give type of object \glec{easier to decode} -\item Bad: value of record field must be string +\item Bad: values of record fields must be string \glec{concepts cannot be given in attribute} \glec{integers, Booleans, whitespace-separated lists coded as strings} \end{itemize} @@ -1834,7 +1835,7 @@ \section{Data Representation Languages} \begin{blockitems}{Problem} \item Large objects are often redundant \glec{specially when machine-produced} -\item Same string, URL, mathematical objects occurs in multiple places +\item Same string, URL, mathematical object occurs in multiple places \item Handled in memory via pointers \item Size of serialization can explode \end{blockitems} @@ -1866,7 +1867,7 @@ \section{Data Representation Languages} \item Problem: only works if \begin{itemize} \item for immutable data structures - \item if no occurrence-specific metadata \glec{e.g., source reference} + \item if no occurrence-specific metadata \glec{e.g., source references} \end{itemize} \end{blockitems} @@ -1927,7 +1928,7 @@ \section{Codecs} Easy cases: \begin{itemize} \item StandardFloat: as specified in IEEE floating point standard -\item StandardString: as themselves, quoted +\item StandardString: as themselves \item StandardBool: as $true$ or $false$ \item StandardInt (64-bit): decimal digit-sequences as usual \end{itemize} @@ -1967,18 +1968,19 @@ \section{Codecs} \begin{frame}\frametitle{Codecs for Lists} Encode list $x$ of elements of type $T$ \begin{itemize} -\item $L$ is strings: e.g., comma-separated list of $T$-encoded elements of $x$ +\item $L$ is strings: e.g., comma-separated list of $T$-encoded elements of $x$\\ +\glec{(un)escape comma in T-encoded elements} \item $L$ is JSON: \begin{itemize} \item ListAsString: like for strings above - \item ListAsArray: lists JSON array of $T$-encoded elements of $x$ + \item ListAsArray: JSON array of $T$-encoded elements of $x$ \end{itemize} \end{itemize} \end{frame} \begin{frame}\frametitle{Additional Types} -Examples: semester +Example: semester Extend BDL: \begin{commgrammar} @@ -1995,7 +1997,7 @@ \section{Codecs} \end{frame} \begin{frame}\frametitle{Additional Types (2)} -Examples: timestamps +Example: timestamps Extend BDL: \begin{commgrammar} @@ -2103,19 +2105,14 @@ \section{Data Interchange} Ontology based on BDL-ADTs with additional codec information: \begin{lstlisting}[basicstyle=\footnotesize] schema Instructor -<<<<<<< Updated upstream - name: string codec StandardString - age: int codec StandardInt - courses: list Course codec CommaSeparatedList CourseAsName -======= - name: string codec StandardString - age: int codec StandardInt - courses: list Course codec CommaSeparatedList CourseAsName ->>>>>>> Stashed changes + name: string codec StandardString + age: int codec StandardInt + courses: list Course codec CommaSepList CourseAsName + schema Course - name: string codec StandardString - credits: float codec StandardFloat - semester: Semester codec SemesterAsString + name: string codec StandardString + credits: float codec StandardFloat + semester: Semester codec SemesterAsString \end{lstlisting} \medskip @@ -2130,7 +2127,7 @@ \section{Data Interchange} \begin{frame}\frametitle{Open Problem: Non-Compositionality} \begin{blockitems}{Sometimes optimal translation is non-compositional} -\item example translate $list$-type in ADT to comma-separated string in DB +\item example translates $list$-type in ADT to comma-separated string in DB \item better break up $list\,B$ fields in type $A$ into separate table with columns for $A$ and $B$ \end{blockitems} @@ -2143,24 +2140,27 @@ \section{Data Interchange} \begin{frame}\frametitle{Open Problem: Querying} \begin{itemize} -\item General setup +\item General setup\glec{$L = \text{SQL}$} \begin{itemize} - \item write SQL-style queries using at the BDL level + \item write SQL-style queries using BDL \item automatically encode values when writing to database from PL \item automatically decode query results when reading from DB \end{itemize} \item But queries using semantic operations cannot always be translated to DB \begin{itemize} \item operation $IsSummer: Semester \to bool$ in BDL - \item query \lstinline|SELECT * FROM course WHERE $IsSummer$(semester)| + \item query \lstinline|SELECT * FROM course WHERE $\ IsSummer$(semester)| \item how to map $IsSummer$ to SQL? \end{itemize} \item Ontology operations need commuting operations on codes \begin{itemize} -\item given $f: A\to B$ in BDL, codecs $C,D$ for $A$ and $B$ -\item SQL function $f'$ commutes with $f$ iff \\ - \[B.decode (f'(C.encode\,a)) = f(a)\] - for all $a:A$ +\item given $f: T_1\to T_2$ in BDL and respective codecs $C_{1,2}$, +\item $L$-function $f'$ commutes with $f$ iff \[ + \begin{tikzcd}[ampersand replacement=\&] + T_1 \arrow[d, "C_1.encode"'] \arrow[r, "f"] \& T_2\\ + L \arrow[r, "f'"] \& L \arrow[u, "C_2.decode"'] + \end{tikzcd} +\] \end{itemize} \end{itemize} \end{frame} @@ -2472,7 +2472,7 @@ \section{Concretized Queries} \item BOL ontology: \[\mathll{concept\, male,\; concept\, person,\;axiom\, male \sqsubseteq person, \\ individual\, FlorianRabe,\;assertion\, FlorianRabe\, isa\, male}\] -\item Query $x:individual\vdash_{BOL}x \text{ isa } person$ +\item Query $x:individual\vdash_{BOL}x \text{ is-a } person$ \item Translation to SFOL: $x:\iota\vdash_{SFOL} person(x)$ \item SFOL calculus yields theorem $\vdash_{SFOL}person(FlorianRabe)$ \item Query result $\sem{\gamma}= x:= FlorianRabe$ @@ -2790,7 +2790,7 @@ \section{Syntactic Querying} \textbf{Indexer} consists of \begin{itemize} \item data structure $O$ for indexable objects - \lec{specific to aspect, index design} + \lec{aspect-specific index design} \glec{e.g., words, syntax trees} \item function that maps library to index \glec{the indexing} \end{itemize} @@ -3773,7 +3773,7 @@ \section{Absolute Semantics for BOL} \[\rul{\vdash c\sqsubseteq d \tb \vdash d\sqsubseteq c}{\vdash c\Equiv d}\] Equal concepts can be substituted for each other: -\[\rul{\vdash c\Equiv d\tb x:C\vdash f(x):\prop \tb \vdash f(c)}{\vdash f(d)}\] +\[\rul{\vdash c\Equiv d\tb x:C\vdash f:\prop\tb \vdash f[x := c]}{\vdash f[x := d]}\] \glec{This completely defines equality.} \end{frame} @@ -3810,7 +3810,7 @@ \section{Absolute Semantics for BOL} \begin{itemize} \item<2-> Open world: no \item<3-> Closed world: yes - \[\rul{\Gamma[x=i]\vdash f[x=i] \;\text{ for every individual } i}{\Gamma, x:I\vdash f(x)}\] + \[\rul{\Gamma,x:I \vdash f:\prop\tb\Gamma[x:=i]\vdash f[x:=i] \;\text{ for every ind. } i}{\Gamma, x:I\vdash f}\] \glec{effectively applicable if only finitely many individuals} \end{itemize} \end{frame} diff --git a/WuV/bolsem.tex b/WuV/bolsem.tex index e11df03..c586988 100644 --- a/WuV/bolsem.tex +++ b/WuV/bolsem.tex @@ -144,6 +144,7 @@ \subsection{Semantics} \begin{compactitem} \item A binary predicate symbol $C_R\sq i\times i$. Note that $R$ may be a complex expression; so we have to generate a fresh name $C_R$ here. \item The axiom $\forall x:\iota,y:\iota.\,R(x,y)\impl C_R(x,y)$, i.e., $C_R$ extends $R$. + \item The axiom $\forall x:\iota.\,C_R(x,x)$, i.e., $C_R$ is reflexive. \item The axiom $\forall x:\iota,y:\iota,z:\iota.\,C_R(x,y)\wedge C_R(y,z)\impl C_R(x,z)$, i.e., $C_R$ is transitive. \end{compactitem} \item We add the case $\sem{R^*}=C_R(x,y)$ to the interpretation function. @@ -199,8 +200,8 @@ \subsection{Semantics} $F$ & consistency check, consequence closure (omitted)\\ \hline Formula & Query that returns empty result iff formula is true \\ -$C_1 \Equiv C_2$ & $(\sem{C_1}\sm\sem{C_2})$ UNION $(\sem{C_2}\sm\sem{C_1})$\\ -$C_1 \sqsubseteq C_2$ & $\sem{C_1}\sm\sem{C_2}$\\ +$C_1 \Equiv C_2$ & $(\sem{C_1}\text{ EXCEPT }\sem{C_2})$ UNION $(\sem{C_2}\text{ EXCEPT }\sem{C_1})$\\ +$C_1 \sqsubseteq C_2$ & $\sem{C_1}$ EXCEPT $\sem{C_2}$\\ $I\; \texttt{is-a}\; C$ & $\sem{I}$ IN $C$\\ $I_1\; R\; I_2$ & ($\sem{I_1}$, $\sem{I_2}$) IN $R$ \\ $I\; P\; V$ & ($\sem{I}$, $V$) IN $P$ \\ @@ -212,8 +213,8 @@ \subsection{Semantics} $i$ & SELECT * FROM $i$\\ $C_1 \sqcup C_2$ & $\sem{C_1}$ UNION $\sem{C_2}$\\ $C_1 \sqcap C_2$ & $\sem{C_1}$ INTERSECT $\sem{C_2}$\\ -$\forall R.C$ & individuals $\sm$ (SELECT subject FROM $\sem{R}$ WHERE object NOT IN $\sem{C})$ \\ -$\exists R.C$ & SELECT DISTINCT subject FROM $\sem{R}$, $\sem{C}$ WHERE object=id\\ +$\forall R.C$ & individuals EXCEPT (SELECT subject FROM $\sem{R}$ WHERE object NOT IN $\sem{C}$) \\ +$\exists R.C$ & SELECT DISTINCT subject FROM $\sem{R}$ WHERE object IN $\sem{C}$\\ $\dom\, R$ & SELECT DISTINCT subject FROM $\sem{R}$\\ $\rng\, R$ & SELECT DISTINCT object FROM $\sem{R}$\\ $\dom\, P$ & SELECT DISTINCT subject FROM $\sem{P}$\\ @@ -222,8 +223,8 @@ \subsection{Semantics} $i$ & SELECT * FROM $i$\\ $R_1 \cup R_2$ & $\sem{R_1}$ UNION $\sem{R_2}$\\ $R_1 \cap R_2$ & $\sem{R_1}$ INTERSECT $\sem{R_2}$\\ -$R_1 ; R_2$ & SELECT DISTINCT l.subject, r.object FROM $\sem{R_1}$ AS l, $\sem{R_2}$ AS r \\ - & \tb\tb WHERE l.object = r.subject\\ +$R_1 ; R_2$ & SELECT DISTINCT l.subject, r.object FROM $\sem{R_1}$ AS l\\ + & \tb\tb JOIN $\sem{R_2}$ AS r ON l.object = r.subject\\ $R^{-1}$ & SELECT object AS subject, subject AS object FROM $\sem{R}$\\ $R^*$ & (tricky, omitted)\\ $\Delta_C$ & SELECT id AS subject, id AS object FROM $\sem{C}$\\ @@ -231,14 +232,11 @@ \subsection{Semantics} Property of type $T$ & SQL query for two-column table\\ $i$ & SELECT * FROM $i$\\ \end{tabular} -\medskip - -Using the abbreviation: $S\sm T$ = SELECT id FROM $S$ WHERE id NOT IN $T$\\ \caption{Interpretation Function for BOL into SQL}\label{fig:bolsem:sql} \end{figure} \begin{remark}[Limitations] -Our interpretation of BOL in SQL is restricted to assertions using only atomic expressions. +Our interpretation of BOL to SQL is restricted to assertions that use atomic expressions only. For example, in the case for $I$ \texttt{is-a} $C$, we assume that $I$ and $C$ are names. Thus, we have already created an individual for $I$ and a table for $C$, and we can thus insert the former into the latter. The general case would be more complicated but is much less important in practice. @@ -279,16 +277,16 @@ \subsection{Semantics} Moreover, the axioms result in the following consistency checks, i.e., queries that should be empty: \begin{lstlisting} -SELECT * FROM male $\sm$ SELECT * FROM person -SELECT * FROM instructor $\sm$ SELECT * FROM person -SELECT * FROM (SELECT DISTINCT subject FROM teach) $\sm$ SELECT * FROM instructor -SELECT * FROM (SELECT DISTINCT object FROM teach) $\sm$ SELECT * FROM course -(SELECT * FROM (SELECT DISTINCT subject FROM creditValue) $\sm$ SELECT * FROM course) - UNION (SELECT * FROM course $\sm$ SELECT DISTINCT subject FROM creditValue) -SELECT * FROM course $\sm$ +(SELECT * FROM male) EXCEPT (SELECT * FROM person) +(SELECT * FROM instructor) EXCEPT (SELECT * FROM person) +SELECT * FROM (SELECT DISTINCT subject FROM teach) EXCEPT (SELECT * FROM instructor) +SELECT * FROM (SELECT DISTINCT object FROM teach) EXCEPT (SELECT * FROM course) +((SELECT DISTINCT subject FROM creditValue) EXCEPT (SELECT * FROM course)) + UNION ((SELECT * FROM course) EXCEPT (SELECT DISTINCT subject FROM creditValue)) +(SELECT * FROM course) EXCEPT (SELECT DISTINCT subject - FROM (SELECT object AS subject, subject AS object FROM teach), instructor - WHERE object=id) + FROM (SELECT object AS subject, subject AS object FROM teach) + WHERE object IN (SELECT * FROM instructor)) \end{lstlisting} Some of these checks will only succeed after performing the consequence closure. In particular, the table \verb|person| misses the entry $1$ for the individual $\FR$ because the assertion {\FR\;\texttt{is-a}\;\person} is only present as a consequence @@ -314,6 +312,7 @@ \subsection{Semantics} \item classes for all BOL-base types and values for them (if not already present in Scala) \item classes for individuals and hash sets of objects: \begin{lstlisting} +import scala.collection.immutable.Set import scala.collection.mutable.HashSet val individuals = new HashSet[String] \end{lstlisting} @@ -336,16 +335,15 @@ \subsection{Semantics} \kw{individual}\,$i$ & val $i$ = "$i$"; individuals += $i$ \\ \kw{concept}\,$i$ & val $i$ = new HashSet[String]\\ \kw{relation}\,$i$ & val $i$ = new HashSet[(String,String)] \\ -\kw{property}\,$i:T$ & val $i$ = new HashSet[(String,T)] \\ +\kw{property}\,$i:T$ & val $i$ = new HashSet[(String,$\sem{T}$)] \\ $I\; \texttt{is-a}\; C$ & $\sem{C}$ += $\sem{I}$\\ $I_1\; R\; I_2$ & $\sem{R}$ += $(\sem{I_1},\sem{I_2})$\\ $I\; P\; V$ & $\sem{P}$ += $(\sem{I},\sem{V})$\\ $F$ & assertions, consequence closure (omitted)\\ \hline Formula & Program that evaluates the formula to a Boolean \\ -$C_1 \Equiv C_2$ & \{val $c1$ = $\sem{C_1}$; val $c2$ = $\sem{C_2}$; \\ - & \tb $c_1$.forall(x $\rA$ $c_2$.contains(x)) \&\& $c_2$.forall(x $\rA$ $c_1$.contains(x))\}\\ -$C_1 \sqsubseteq C_2$ & \{val $c1$ = $\sem{C_1}$; val $c2$ = $\sem{C_2}$; $c_1$.forall(x $\rA$ $c_2$.contains(x))\}\\ +$C_1 \Equiv C_2$ & $\sem{C_1}$ == $\sem{C_2}$\\ +$C_1 \sqsubseteq C_2$ & $\sem{C_1}$.subsetOf($\sem{C_2}$)\\ $I\; \texttt{is-a}\; C$ & $\sem{C}$.contains($\sem{I}$)\\ $I_1\; R\; I_2$ & $\sem{R}$.contains($(\sem{I_1},\sem{I_2})$)\\ $I\; P\; V$ & $\sem{P}$.contains($(\sem{I},\sem{V})$)\\ @@ -353,7 +351,7 @@ \subsection{Semantics} Individual & String object\\ $i$ & $i$ \\ \hline -Concept & HashSet[String] object\\ +Concept & Set[String] object\\ $i$ & $i$\\ $C_1 \sqcup C_2$ & $\sem{C_1}$.union($\sem{C_2}$)\\ $C_1 \sqcap C_2$ & $\sem{C_1}$.inter($\sem{C_2}$)\\ @@ -361,19 +359,19 @@ \subsection{Semantics} & \tb r.foreach(x $\rA$ if (!c.contains(x.\_2)) e -= x.\_1); e\} \\ $\exists R.C$ & \{val c = $\sem{C}$; val r = $\sem{R}$; val e = new HashSet[String]; \\ & \tb r.foreach(x$ \rA$ if (c.contains(x.\_2)) e += x.\_1); e\}\\ -$\dom\, R$ & \{val c = new HashSet[String]; $\sem{R}$.foreach(x $\rA$ c += x.\_1); c\}\\ -$\rng\, R$ & \{val c = new HashSet[String]; $\sem{R}$.foreach(x $\rA$ c += x.\_2); c\}\\ -$\dom\, P$ & \{val c = new HashSet[String]; $\sem{P}$.foreach(x $\rA$ c += x.\_1); c\}\\ +$\dom\, R$ & $\sem{R}$.map(\_.\_1).toSet\\ +$\rng\, R$ & $\sem{R}$.map(\_.\_2).toSet\\ +$\dom\, P$ & $\sem{P}$.map(\_.\_1).toSet\\ \hline -Relation & HashSet[(String,String)] object\\ +Relation & Set[(String,String)] object\\ $i$ & $i$\\ $R_1 \cup R_2$ & $\sem{R_1}$.union($\sem{R_2}$)\\ $R_1 \cap R_2$ & $\sem{R_1}$.inter($\sem{R_2}$)\\ $R_1 ; R_2$ & \{val r1 = $\sem{R_1}$; val r2 = $\sem{R_2}$; val e = new HashSet[(String,String)]; \\ & \tb r1.foreach(x $\rA$ r2.foreach(y $\rA$ if (x.\_2 == y.\_1) e += (x.\_1,y.\_2))); e\}\\ -$R^{-1}$ & \{val r = new HashSet[(String,String)]; $\sem{R}$.foreach(x $\rA$ r += (x.\_2,x.\_1)); r\}\\ -$R^*$ & (omitted)\\ -$\Delta_C$ & \{val r = new HashSet[(String,String)]; $\sem{C}$.foreach(x $\rA$ r += (x,x)); r\}\\ +$R^{-1}$ & $\sem{R}$.map(x $\rA$ (x.\_2, x.\_1)).toSet\\ +$R^*$ & (omitted)\\ +$\Delta_C$ & $\sem{C}$.map(x $\rA$ (x, x)).toSet\\ \hline Property of type $T$ & HashSet[(String,T)] object\\ $i$ & $i$\\ @@ -387,7 +385,8 @@ \subsection{Semantics} $(A,B)$ is the product type $A\times B$ with pairing operator $(x,y)$ and projection functions $\_1$ and $\_2$. $x\rA F(x)$ is $\lambda x.F(x)$. -The class HashSet is part of the standard library and offers function += and -= to add/remove elements, contains to test elementhood, and forall, foreach to quantifiy/iterate over elements. +The class \texttt{Set} is part of the standard library and offers the binary functions inter, union for intersections and unions, subsetOf for subset tests, and functions forall, foreach and map to quantify/iterate over elements. +The class \texttt{HashSet} (from the \texttt{scala.collection.mutable} namespace) provides the same functionality and additionally functions += and -= for adding/removing elements. Types of variables are inferred if omitted. \end{remark} @@ -424,26 +423,18 @@ \subsection{Semantics} Note that we could easily compute the hash set \verb|instructor.diff(male)| and add to it. But that would not add anything to the two constituent sets. -If we thing of the axioms as consistency checks, we can translate them to assertions, i.e., Boolean expressions that must be true. +If we think of the axioms as consistency checks, we can translate them to assertions, i.e., Boolean expressions that must be true. We only give some examples: \begin{lstlisting} -{val c1 = male; val c2 = person; c1.forall(x $\rA$ c2.contains(x))} - -{ - val c1 = course; - val c2 = { - val c = instructor; - val r = { - val r = new HashSet[(String,String)]; - teach.foreach(x $\rA$ r += (x._2,x._1)); - r - } - val e = new HashSet[String]; - r.foreach(x $\rA$ if (c.contains(x._2)) e += x._1); - e - }; - c1.forall(x $\rA$ c2.contains(x)) -} +male.subsetOf(person) + +course.subsetOf({ + val c = instructor; + val r = teach.map(x $\rA$ (x._2, x._1)).toSet; + val e = new HashSet[String]; + r.foreach(x $\rA$ if (c.contains(x._2)) e += x._1); + e +}); \end{lstlisting} %instructor $\sqsubseteq$ person @@ -600,7 +591,7 @@ \section{Narrative Semantics}\label{sec:bolsem:narr} \item We need a lexicon to obtain inflection information and the translation tries to remedy that by appending ``s'' in various places. This works in some cases but not in others. %\item We have introduced some pattern matching and conditional rules in \Cref{fig:bolsem:eng} to alleviate the greatest awkwardnesses, but much more would be needed, which would lead to things like ``Peter hass Mary'' or ``Peter has as childs Mary''. -\item there are many linguistic devices that serve an important role in natural language, but which we are not targeting. +\item There are many linguistic devices that serve an important role in natural language, but which we are not targeting. An example is plural objects for aggregation. Say we have $P \mathtt{is-a} C$, $M \mathtt{is-a} C$, this would translate to ``P is a $\sem{C}$, M is a $\sem{C}$'' in BOL-pidgin, whereas in natural English we would aggregate this to ``P and M are $\sem{C}$s''. \end{itemize} @@ -625,4 +616,4 @@ \section{Exercise 2} You have to implement that as well or use a library for it. We recommend not focusing on implementing the syntax and semantics in their entirety. -It is more instructive to save time by choosing a sublanguage of BOL (by omitting some productions) and to use the time to implement a second semantics. \ No newline at end of file +It is more instructive to save time by choosing a sublanguage of BOL (by omitting some productions) and to use the time to implement a second semantics. diff --git a/WuV/onto_general.tex b/WuV/onto_general.tex index 28b5d83..78d8bc7 100644 --- a/WuV/onto_general.tex +++ b/WuV/onto_general.tex @@ -71,6 +71,16 @@ \section{General Principles}\label{sec:onto:principles} This part is commonly called the \textbf{ABox} (A for assertional). \end{compactitem} +Some sentences in following paragraph partially copied from \url{https://en.wikipedia.org/w/index.php?title=Abox&oldid=892872890}. License: \url{https://en.wikipedia.org/w/index.php?title=Special:CiteThisPage&page=Abox&id=892872890&wpFormIdentifier=titleform}. +TBox declarations tend to be more permanent within a knowledge base and are used as a schema. +In contrast, ABox declarations are much more dynamic in nature and tend to represent instance data. +For instance, we can understand relational databases as ontologies made up of table definitions as TBox declarations and table rows as ABox declarations. +There, both kind of declarations are even separately stored for efficiency reasons, and in particular, the concrete way of representing table rows +as bits is determined by the corresponding table definitions. +However, such a hard distinction is not necessarily the case for other ontology languages. +For example, in the ontology language OWL or in general in triplestores, both kind of declarations are stored next to each other and the syntactic representation distinction blurs. +Nonetheless, we can usually distinguish between both declarations by their usage and lifecycle during the development of a concrete ontology. + A separate division into two parts is the following: \begin{compactitem} \item The \textbf{signature} part contains everything that introduces a \textbf{named entity}: individuals, concepts, relations, and properties. @@ -322,13 +332,14 @@ \section{Representing Ontologies as Triples}\label{sec:onto:triple} \paragraph{Problems} Reflection is subtle and can easily lead to inconsistencies. -We can see this in how the approach of RDF(S) special entities breaks the semantics via FOL. +We can see this in how the approach of RDF(S) special entities breaks the implicitly intended FOL semantics. -For example, it treats classes both as concepts (when they occur as the object of a concept assertion) and as individuals (when they occur as subject or object of a "rdfs:subClassOf" relation assertion). +For example, it treats classes, e.g. ``instructor'', both as concepts and as individuals. The treatment as a concept happens when a class occurs as the object of a concept assertion, e.g. \texttt{FlorianRabe rdf:type instructor}. By contrast, in relation assertions, e.g. \texttt{instructor rdfs:subclassOf person}, classes appearing as subject or object of such an assertion are treated as individuals. Similarly, "rdfs:Class" is used both as an individual and as a class. In fact, the standard prescribes that "rdfs:Class" is an instance of itself. -In practice, this is handled pragmatically by using ontologies that make sense. +In theory, such ambivalent treatment of one and the same thing as an individual and a class is problematic in FOL. This is because FOL's individual and predicate symbols can neither be inter- nor intramixed in arbitrary ways, e.g., predicates cannot be applied to other predicates. +In practice however, this is handled pragmatically by using ontologies that make sense. A formal way to disentangle this is to assume that there are two variants of "rdfs:Class", one as an individual and one as a class. The translation must then translate "rdfs:Class" differently depending on how it is used. diff --git a/WuV/typing.tex b/WuV/typing.tex index 276cdc0..f86b3ec 100644 --- a/WuV/typing.tex +++ b/WuV/typing.tex @@ -174,7 +174,7 @@ \subsection{Motivation} \end{lstlisting} It is often desirable to use types to force the presence of such assertions. -We might wish require that every instructor teaches a list of things, and has an office. +We might require that every instructor teaches a list of things and has an office. Moreover, we can use types to specify the objects of the respective assertions: we can specify that only courses are taught and that the office is a string. Rather than the relations with subjects "FlorianRabe" just happening to be around as well, the type system would now force their existence and the type of the object. Forgetting to give such an assertion or giving it with the wrong object could be detected statically (i.e., without applying the semantics) and flagged as a typing error. @@ -193,10 +193,10 @@ \subsection{Motivation} "age" 40 "office" "11.137" \end{lstlisting} -Now the type "instructor" forces the presence of a list of taught courses (The $^*$ is meant to indicate a list.), an integer for the age, and a string for the office. +Now the type "instructor" forces the presence of a list of taught courses (the $^*$ is meant to indicate a list.), an integer for the age, and a string for the office. We can now see that, in fact, every person should have an age, and not just every instructor. -Because every instructor is meant to be a person, we could try to capture this as well to avoid redundancy. +In fact every instructor is meant to be a person, hence we could try to capture this as well to avoid redundancy. Moreover, every male is meant to be a person, too. That leads to the idea of \textbf{modular types}. @@ -338,14 +338,18 @@ \subsection{Rigorous Definition} \item a \textbf{flat} type of the form \[\{c_1:T_1[=t_i],\ldots,c_n:T_n[=t_i]\}\] where the $c_i$ are distinct names, the $T_i$ are types, and the $t_i$ are optional and wherever given must have type $T_i$, or -\item a \textbf{mixin} type of the form $A_1*A_2$ for ADTs $A_i$. +\item a \textbf{mixin} type of the form $A*B$ for ADTs $A$ and $B$. \end{compactitem} We say that a type system has \textbf{internal ADT}s if all ADTs are types (and thus may in particular occur as the $T_i$ in a record type). \end{definition} -The intuition of a mixin $A*B$ is that we merge the fields of $A$ and $B$. -However, this union dependent: if $B$ is flat, its fields may refer to fields introduced in $A$. +The intuition of a mixin $A*B$ is that we \emph{merge} the fields of $A$ and $B$. +Whereas copying the fields of two ADTs side-by-side is easy and already possible with flat ADTs via $\{c_1: A, c_2: B\}$ (assuming we have internal ADTs), merging is a crucial feature that the language (and type system) must support. +For example in Java, occurrences of overridden fields or methods (often annotated with \texttt{override}) all give rise to some Java-specific way of merging. +Defining a useful yet intuitive way of merging is non-trivial and we elaborate on an exemplary way of doing so and its limitations later. + +TODO: However, this union dependent: if $B$ is flat, its fields may refer to fields introduced in $A$. The most important special case of an ADT are classes: @@ -367,6 +371,22 @@ \subsection{Rigorous Definition} } \end{lstlisting} The details can vary, and special care must be taken in programming languages where initialization may have side effects. +Methods of OO-languages arise as special case of fields whose types are the method signatures and whose definitions are the method bodies. +For example, the Scala class given by +\begin{lstlisting} +// vvvvvvvvvv this is Scala's syntax for declaring a constructor +// receiving such an argument and storing it in an +// equinamed field +class NatAdder(val n: nat) { + def addAndReturn(m: nat): nat = { + return n + m + } +} +\end{lstlisting} +could be represented via \[ + \text{NatAdder} = \texttt{Base} \ast \{n: \mathbb{N}, \text{addAndReturn}: \mathbb{N} \rightarrow \mathbb{N} = \lambda m:\mathbb{N}.\ n + m\} +\] +assuming sufficient background knowledge --- e.g., for natural numbers with $\mathbb{N}$ and $+$, function types $\_ \rightarrow \_$, and lambdas $\lambda \_:\_.\ \_$ --- has been declared in some ADT $\text{Base}$ or stems from the underlying language and type system used. Flat ADTs are the standard case, and all mixin ADTs can be simplified into flat ones. This can be seen as a semantics in the sense that the language of flat and mixin ADT is translated to the language of flat ADTs. @@ -378,7 +398,7 @@ \subsection{Rigorous Definition} $\flt{A}$ arises by concatenating the fields of all $\flt{A_i}$ where duplicate field names are handled as follows: \begin{compactitem} \item if the same field (same name, types equal, definitions equal or both absent) occurs more than once, only the first occurrence is kept, - \item if the fields $c:T_1[=t_i]$ and $c:T_2[=t_2]$ occur for inequal types $T_i$, $A$ is ill-formed, + \item if the fields $c:T_1[=t_1]$ and $c:T_2[=t_2]$ occur for inequal types $T_i$, $A$ is ill-formed, \item if the fields $c:T=t_1$ and $c:T=t_2$ occur for inequal objects $t_i$, $A$ is ill-formed, \item if the fields $c:T=t$ and $c:T$ occur, only the defined one is kept $(\ast)$. \end{compactitem} @@ -391,12 +411,17 @@ \subsection{Rigorous Definition} In the simplest case, we forbid such forward references. Then ADTs are very well-behaved. -But we have a problem with the case $(\ast)$ in Def.~\ref{def:mixinflat}: if $c:T$ occurs before $c:T=t$, we cannot simply drop the former because intermediate fields may refer to $c$. +But we have a problem with the case $(\ast)$ in Def.~\ref{def:mixinflat}: if $c:T$ occurs before $c:T=t$, we cannot simply drop the former because intermediate fields before $c:T=t$ may refer to $c$. + A straightforward solution would be to declare the ADT to be ill-formed. -But unfortunately, this case is very important in practice --- it occurs whenever $c:T$ is declared in an abstract class and $c:T=t$ in a concrete class implementing it. +But unfortunately, this case is very important in practice --- it occurs whenever $c:T$ is declared in an abstract class and is overridden via $c:T=t$ in a concrete class implementing it. + +A more common solution is to allow fields to be not only backwards-referencing, but also forward-referencing, or in general to be mutually recursive. +This solves case $(\ast)$ since intermediate fields would --- after the removal of $c:T$ --- forward-reference $c:T = t$. +Indeed, many OO-languages allow a restricted form of mutual recursion, namely that methods of a class may be mutually recursive, or more precisely that method bodies may forward- or backward-reference other methods. +In ADT tonus, this translates to fields being allowed to forward-reference other fields in their definition; and \emph{only} there, i.e., types of fields can only backward-reference other fields as usual. -A more common solution is to allow the fields to be mutually recursive. -Consider a flat ADT with fields $\Gamma, c:T[=t], \Delta$ where $\Gamma$ and $\Delta$ are lists of fields. +Formally, we can capture this as follows. Consider a flat ADT with fields $\Gamma, c:T[=t], \Delta$ where $\Gamma$ and $\Delta$ are lists of fields. Let $\Gamma'$ and $\Delta'$ arise by dropping all definitions. Then we require that \begin{compactitem}