Skip to content

Commit 0a54bf6

Browse files
committed
first steps to static type checking using mypy
1 parent 468010b commit 0a54bf6

File tree

7 files changed

+125
-21
lines changed

7 files changed

+125
-21
lines changed

bibliography/bibliography.bib

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ @string { a_burovski_evgeni
5050
@string { a_carey_cj = "CJ Carey" }
5151
@string { a_castro_oscar = "Oscar Castro" }
5252
@string { a_chanan_gregory = "Gregory Chanan" }
53+
@string { a_chen_boqi = "Boqi Chen" }
5354
@string { a_chen_jianmin = "Jianmin Chen" }
5455
@string { a_chen_zhifeng = "Zhifeng Chen" }
5556
@string { a_chen_zhi = "Zhi Chen" }
@@ -114,6 +115,7 @@ @string { a_jones_eric
114115
@string { a_katz_daniel_s = "Daniel S.\ Katz" }
115116
@string { a_keeton_kimberly = "Kimberly Keeton" }
116117
@string { a_kern_robert = "Robert Kern" }
118+
@string { a_khan_faizan = "Faizan Khan" }
117119
@string { a_killeen_trevor = "Trevor Killeen" }
118120
@string { a_klopf_andreas = "Andreas K{\"o}pf" }
119121
@string { a_konovalov_alexander = "Alexander Konovalov" }
@@ -136,6 +138,7 @@ @string { a_mann_samuel
136138
@string { a_massa_francisco = "Francisco Massa" }
137139
@string { a_maxwell_aaron = "Aaron Maxwell" }
138140
@string { a_mayorov_nikolay = "Nikolay Mayorov" }
141+
@string { a_mcintosh_shane = "Shane McIntosh" }
139142
@string { a_michel_vincent = "Vincent Michel" }
140143
@string { a_millman_jarrod_k = "K.\ Jarrod Millman" }
141144
@string { a_mirjalili_vahid = "Vahid Mirjalili" }
@@ -209,6 +212,7 @@ @string { a_van_mulbregt_paul
209212
@string { a_van_rossum_guido = "Guido {van Rossum}" }
210213
@string { a_van_rysdam = "Peter {van Rysdam}" }
211214
@string { a_varoquaux_gael = "Ga{\"e}l Varoquaux" }
215+
@string { a_varro_daniel = "D{\'a}niel Varr{\'o}" }
212216
@string { a_vasudevan_vijay = "Vijay Vasudevan" }
213217
@string { a_vetterling_william_t = "William T.\ Vetterling" }
214218
@string { a_virtanen_pauli = "Pauli Virtanen" }
@@ -301,7 +305,7 @@ @string { p_packt
301305
@string { p_pearson_education = "Pearson Education, Inc." }
302306
@string { p_microsoft_press = "{Microsoft Press}, " # p_pearson_education }
303307
@string { p_plos = "Public Library of Science~{(PLOS)}" }
304-
@string { p_python_software_foundation = "{Python Software Foundation~{(PSF)}}" }
308+
@string { p_python_software_foundation = "{\python\ Software Foundation~{(PSF)}}" }
305309
@string { p_springer = "{Springer}" }
306310
@string { p_springer_nature_limited = "{Springer Nature Limited}" }
307311
@string { p_springer_new_york = "{Springer New York}" }
@@ -401,6 +405,20 @@ @xdata{j_ijoc
401405
address = pa_informs
402406
}
403407

408+
@xdata{j_itose,
409+
journal = {{IEEE} Transactions on Software Engineering},
410+
publisher = p_ieee,
411+
address = pa_ieee,
412+
issn = {0098-5589},
413+
}
414+
415+
@xdata{j_jomlr,
416+
journal = {Journal of Machine Learning Research~{(JMLR)}},
417+
issn = {1532-4435},
418+
publisher = p_mit_press,
419+
address = pa_mit_press,
420+
}
421+
404422
@xdata{j_lum,
405423
journal = {;login: Usenix Magazin},
406424
publisher = p_usenix,
@@ -415,20 +433,6 @@ @xdata{j_mm
415433
issn = {0025-570X},
416434
}
417435

418-
@xdata{j_jomlr,
419-
journal = {Journal of Machine Learning Research~{(JMLR)}},
420-
issn = {1532-4435},
421-
publisher = p_mit_press,
422-
address = pa_mit_press,
423-
}
424-
425-
@xdata{j_oscdo,
426-
journal = {Ontology Studies (Cuadernos de Ontolog{\'i}a)},
427-
issn = {1576-2270},
428-
publisher = p_universidad_del_pais_vasco,
429-
address = pa_universidad_del_pais_vasco,
430-
}
431-
432436
@xdata{j_n,
433437
journal = {Nature},
434438
issn = {0028-0836},
@@ -450,6 +454,13 @@ @xdata{j_notams
450454
issn = {1088-9477}
451455
}
452456

457+
@xdata{j_oscdo,
458+
journal = {Ontology Studies (Cuadernos de Ontolog{\'i}a)},
459+
issn = {1576-2270},
460+
publisher = p_universidad_del_pais_vasco,
461+
address = pa_universidad_del_pais_vasco,
462+
}
463+
453464
@xdata{j_pcb,
454465
journal = {{PLOS} Computational Biology},
455466
issn = {1553-7358},
@@ -491,7 +502,7 @@ @xdata{rep_ieee_standard
491502
}
492503

493504
@xdata{rep_pep,
494-
type = {Python Enhancement Proposal~{(PEP)}},
505+
type = {\python\ Enhancement Proposal~{(PEP)}},
495506
institution = p_python_software_foundation,
496507
address = pa_python_software_foundation
497508
}
@@ -745,7 +756,7 @@ @inbook{F2011TTOEAP
745756

746757
@book{G2019DSFSFPWP,
747758
author = a_grus_joel,
748-
title = {Data Science from Scratch: First Principles with Python},
759+
title = {Data Science from Scratch: First Principles with \python},
749760
edition = {2},
750761
date = {2019-05},
751762
publisher = p_oreilly,
@@ -876,6 +887,19 @@ @book{J2018NPSCADSAWNSAM
876887
isbn = {9781484242469}
877888
}
878889

890+
@article{KCVM2022AESOTRDIPP,
891+
author = a_khan_faizan # and # a_chen_boqi # and # a_varro_daniel # and # a_mcintosh_shane,
892+
title = {An Empirical Study of Type-Related Defects in \python\ Projects},
893+
xdata = {j_itose},
894+
volume = {48},
895+
number = {8},
896+
pages = {3145--3158},
897+
date = {2022-08-01},
898+
doi = {10.1109/TSE.2021.3082068},
899+
url = {https://www.researchgate.net/publication/351729684},
900+
urldate = {2024-08-16}
901+
}
902+
879903
@book{L2023TDDBTADMLMWT,
880904
author = a_landau_charles,
881905
title = {\tensorflow\ Deep Dive: Build, Train, and Deploy Machine Learning Models with \tensorflow},
@@ -959,7 +983,7 @@ @book{P2021HOMLPAVWP
959983
}
960984

961985
@techreport{PEP8,
962-
title = {Style Guide for Python Code},
986+
title = {Style Guide for \python\ Code},
963987
author = a_van_rossum_guido # and # a_warsaw_barry # and # a_coghlan_alyssa,
964988
xdata = {rep_pep},
965989
number = {8},

scripts/filterPdf.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ do
3737
tempFileDst="$(mktemp --tmpdir="$tempDir")"
3838
echo "$(date +'%0Y-%0m-%0d %0R:%0S'): Now beginning filter cycle $cycle with destination '$tempFileDst'."
3939

40-
echo "$(date +'%0Y-%0m-%0d %0R:%0S'): We ghostscript to filter '$tempFileSrc' to '$tempFileDst'."
40+
echo "$(date +'%0Y-%0m-%0d %0R:%0S'): We use ghostscript to filter '$tempFileSrc' to '$tempFileDst'."
4141
gs -dAntiAliasColorImages=true \
4242
-dAntiAliasGrayImages=true \
4343
-dAntiAliasMonoImages=true \

styles/listing.sty

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ emph={[7]},%
122122
emphstyle={[7]\@lstbasicstyle},%
123123
emph={[8]},%
124124
emphstyle={[8]\@lstbasicstyle},%
125+
breaklines=true,%
126+
postbreak=\mbox{\textcolor{red}{$\hookrightarrow$}\space},%
125127
%
126128
literate=%
127129
{á}{{\'a}}1 {é}{{\'e}}1 {í}{{\'i}}1 {ó}{{\'o}}1 {ú}{{\'u}}1%
@@ -220,8 +222,8 @@ literate=%
220222
%% #5 the caption
221223
\protected\gdef\gitOutput#1#2#3#4#5{%
222224
\gitExec{#1}{#2}{#3}%
223-
\expandafter\expandafter\expandafter\edef\expandafter\csname @pwp@gitFile:exec:#5\endcsname{\gitFile}%
224-
\lstinputlisting[float,style=text_style,label={exec:#4},caption={#5}]{\csname @pwp@gitFile:exec:#5\endcsname}%
225+
\expandafter\expandafter\expandafter\edef\expandafter\csname @pwp@gitFile:exec:#4\endcsname{\gitFile}%
226+
\lstinputlisting[float,style=text_style,label={exec:#4},caption={#5}]{\csname @pwp@gitFile:exec:#4\endcsname}%
225227
}%
226228
%
227229
%% Use latexgit to place a listing with code and the program output.

text/main/basics/simpleDataTypesAndOperations/simpleDataTypesAndOperations.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
\hsection{Simple Datatypes and Operations}%
2+
\label{sec:simplyDataTypesAndOperations}%
23
%
34
\hinput{introduction}{introduction.tex}%
45
\hinput{int}{int.tex}%

text/main/basics/variables/multiAndSwap/multiAndSwap.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@
2525
\pythonil{x, y, z = z, y, x} assigns the present value of \pythonil{z} to become the new value of \pythonil{x}, the present value of \pythonil{y} to also be the new value of \pythonil{y}, and the present value of \pythonil{x} to become the new value of \pythonil{z}.
2626
\pythonil{print(f"x={x}, y={y}, z={z}")} now gives us \textil{x=1, y=2, z=3}.
2727
%
28+
\FloatBarrier%
2829
\endhsection%
2930
%
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
\hsection{Variable Types and Type Hints}%
2+
%
3+
\hsection{Variable Types}%
4+
\gitPythonAndOutput{\programmingWithPythonCodeRepo}{01_variables}{variable_types.py}{--args format}{variables:types}{%
5+
An example for the types of variables.}%
6+
%
7+
A variable is basically a name pointing to an object.
8+
Each object has a type and we already learned about several of these datatypes in \cref{sec:simplyDataTypesAndOperations}.
9+
We can obtain the type of an object stored in variable \pythonil{var} by invoking \pythonil{type(var)}\pythonIdx{type}.
10+
\Cref{lst:variables:types} shows a program that does just that and the output of that program is given in \cref{lst:variables:types}.
11+
It is obvious that the type of a variable that holds an integer value is \pythonilIdx{int}, the type of a variable that holds a floating point number is \pythonilIdx{float}, and so on.
12+
There really is not much to say about that.%
13+
\endhsection%
14+
%
15+
\hsection{Types and Confusion}%
16+
\gitPythonAndOutput{\programmingWithPythonCodeRepo}{01_variables}{variable_types_wrong.py}{--args format}{variables:types_wrong}{%
17+
An example for the confusing variable types.}
18+
19+
Well, actually, there is.
20+
You see, when you declare a variable in a language like \texttt{C}, you have to specify its \emph{type}.
21+
You are then permitted to only assign values that have exactly this type to the variable.
22+
In \python, you do not need to specify a type and you can assign whatever you want to a variable.
23+
This has the advantage that the code is shorter (because you do not need to write the type), looks more elegant, and programming becomes easier.
24+
At first glance.
25+
However, there are also problems.
26+
Let's take a look at \cref{lst:variables:types_wrong}.
27+
We declare a variable named \pythonil{int_var} and store the integer~\pythonil{8} in it.
28+
Then we update \pythonil{int_var} by computing \pythonil{int_var = int_var / 3}.
29+
Back in \cref{sec:int}, you learned that the \pythonilIdx{//} operator performs an integer division with an \pythonil{int} result, whereas the division using the \pythonilIdx{/} operator always returns a \pythonil{float}.
30+
This means that our variable \pythonil{int_var} now contains a \pythonil{float}, which is also visible in the output in \cref{exec:variables:types_wrong}.
31+
32+
From the perspective of \python, this is totally fine.
33+
The program executes and the output appears without error.
34+
However, from the perspective of programming, \cref{lst:variables:types_wrong} is \emph{wrong}.
35+
Imagine that this was not just some random example without meaning.
36+
Imagine that this was a part of a really useful program.
37+
Imagine that you got this program from some source and try to understand it.
38+
If you read this program, then you find that a variable named \pythonil{int_var} contains a \pythonil{float}.
39+
This is not forbidden, but when reading the code, it must strike you as odd.
40+
41+
Indeed, there are at least two possible explanations for this:
42+
Either, the original author of this code mistakenly mixed up the \pythonilIdx{/} operator for the \pythonilIdx{//}.
43+
Maybe they wanted to do an integer division and accidentally did a floating point division.
44+
Depending on what the code later on (in our imaginary larger program) does, it could be very hard to find such an error.
45+
46+
Or maybe the author fully well wanted to do a floating point division and expected a \pythonil{float} to be stored in \pythonil{int_var}, but chose a misleading name.
47+
Choosing this name, however, can be very dangerous:
48+
What if another programmer continues to work on this code and, based on the variable's name, expects it to contain an \pythonil{int} whereas it actually contains a \pythonil{float}.
49+
This could again lead to all sorts of strange errors later on in her code.
50+
51+
Regardless of what is true, you will certainly agree that something is wrong with this program.
52+
And since you are not the author of the program, you do not know what is wrong.
53+
This code will cause some problem down the line.
54+
Many such problems exist in many software projects and they indeed are hard to find~\cite{KCVM2022AESOTRDIPP}.
55+
So here, the lenience of \python\ of allowing us to not specify types comes back to bite us.
56+
57+
Luckily, there are two things that we can do to prevent such situations:%
58+
\begin{enumerate}%
59+
\item Use static type checking tools to find such potential errors in our code.%
60+
\item Use type hints to annotate variables with their types.%
61+
\end{enumerate}%
62+
And if you are in one of my classes, you better do both.
63+
And now we will learn how to do that.%
64+
\endhsection%
65+
%
66+
\hsection{Static Type Checking}%
67+
\gitOutput{\programmingWithPythonCodeRepo}{.}{scripts/mypy.sh 01_variables variable_types_wrong.py}{variables:variable_types_wrong:mypy}{%
68+
The static type checking results of the program given in \cref{lst:variables:types_wrong}.}%
69+
\gitOutput{\programmingWithPythonCodeRepo}{.}{scripts/mypy.sh 01_variables variable_types.py}{variables:variable_types:mypy}{%
70+
The static type checking results of the program given in \cref{lst:variables:types}.}%
71+
%
72+
\endhsection%
73+
\FloatBarrier%
74+
\endhsection%
75+
%

text/main/basics/variables/variables.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
%
1010
\hinput{assignment}{assignment.tex}%
1111
\hinput{multiAndSwap}{multiAndSwap.tex}%
12+
\hinput{typesAndTypeHints}{typesAndTypeHints.tex}%
1213
%
1314
\endhsection%
1415
%

0 commit comments

Comments
 (0)