Skip to content

Commit c439180

Browse files
committed
Finished iteration chapter
1 parent 042828c commit c439180

File tree

5 files changed

+164
-8
lines changed

5 files changed

+164
-8
lines changed

bibliography/bibliography.bib

+41-2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ @string { a_bright_jonathan
5959
@string { a_brucher_matthieu = "Matthieu Brucher" }
6060
@string { a_bruhin_florian = "Florian Bruhin" }
6161
@string { a_bruneau_pierrix = "Pierrick Bruneau" }
62+
@string { a_bucher_brand = "Brandt Bucher" }
6263
@string { a_burnett_magaret_m = "Margaret M.\ Burnett" }
6364
@string { a_burovski_evgeni = "Evgeni Burovski" }
6465
@string { a_carey_cj = "CJ Carey" }
@@ -287,8 +288,9 @@ @string { a_schurr_andy
287288
@string { a_scipy_1 = "{{SciPy~1.0 Contributors}}" }
288289
@string { a_scott_larkin_rigdway = "Larkin Ridgway Scott" }
289290
@string { a_setia_veenu = "Veenu Setia" }
290-
@string { a_shalev_shwartz_shai = "Shai Shalev{-}Shwartz" }
291+
@string { a_shafranovich_y = "Y.\ Shafranovich" }
291292
@string { a_shahrokni_ali = "Ali Shahrokni" }
293+
@string { a_shalev_shwartz_shai = "Shai Shalev{-}Shwartz" }
292294
@string { a_shen_kangshen = "Shen Kangshen" }
293295
@string { a_sheppart_kevin = "Kevin Sheppard" }
294296
@string { a_sigler_laurence_e = "Laurence E.\ Sigler" }
@@ -428,7 +430,7 @@ @string { l_usa_seattle
428430
@string { l_usa_sebastopol = "{{Sebastopol}, {CA}, {USA}}" }
429431
@string { l_usa_shelter_island = "{{Shelter Island}, {NY}, {USA}}" }
430432
@string { l_usa_south_san_francisco = "{{South San Francisco}, {CA}, {USA}}" }
431-
@string { l_usa_new_york = "{{New York}, {NY}, {USA}}" }
433+
@string { l_usa_wilmington = "{{Wilmington}, {DE}, {USA}}" }
432434
433435
434436
%% publishers
@@ -456,6 +458,7 @@ @string { p_hiroshima_university_dep_ie
456458
@string { p_iec = "{International Electrotechnical Commission~{(IEC)}}" }
457459
@string { p_ieee = "{Institute of Electrical and Electronics Engineers~{(IEEE)}}" }
458460
@string { p_ieee_computer_society = "{{IEEE}~Computer Society}" }
461+
@string { p_ietf = "{Internet Engineering Task Force~{(IETF)}}" }
459462
@string { p_infinite_skills = "Infinite Skills Inc" }
460463
@string { p_informs = "{The Institute for Operations Research and the Management Sciences~({INFORMS})}" }
461464
@string { p_iso = "{International Organization for Standardization~{(ISO)}}" }
@@ -529,6 +532,7 @@ @string { pa_iso_iec
529532
@string { pa_ieee = l_usa_piscataway }
530533
@string { pa_ieee_computer_society = l_usa_los_alamitos }
531534
@string { pa_ieee_ny = l_usa_new_york }
535+
@string { pa_ietf = l_usa_wilmington }
532536
@string { pa_infinite_skills = l_canada_oakville }
533537
@string { pa_informs = l_usa_catonsville }
534538
@string { pa_leibniz_zentrum_fur_informatik = l_germany_wadern }
@@ -792,6 +796,14 @@ @xdata{rep_nsotprocg
792796
address = l_china_beijing
793797
}
794798

799+
@xdata{rep_rfc,
800+
institution = p_ietf,
801+
address = pa_ietf,
802+
type = {Request for Comments~(RFC)},
803+
issn = {2070-1721}
804+
}
805+
806+
795807

796808
%% series
797809
@xdata{ser_bs,
@@ -1926,6 +1938,16 @@ @techreport{PEP612
19261938
urldate = {2024-10-09},
19271939
}
19281940

1941+
@techreport{PEP618,
1942+
author = a_bucher_brand,
1943+
title = {Add Optional Length-Checking To \pythonilIdx{zip}},
1944+
xdata = {rep_pep},
1945+
number = {618},
1946+
date = {2020-05-01},
1947+
url = {https://peps.python.org/pep-0618},
1948+
urldate = {2024-11-09}
1949+
}
1950+
19291951
@techreport{PEP635,
19301952
author = a_kohn_tobias # and # a_van_rossum_guido,
19311953
title = {Structural Pattern Matching:~{M}otivation and Rationale},
@@ -2082,6 +2104,13 @@ @inbook{PSF2024CAABCFC
20822104
urldate = {2024-08-22},
20832105
}
20842106

2107+
@inbook{PSF2024CCFRAW,
2108+
title = {\pythonilIdx{csv} -- \acrshort[hyper=true]{CSV} File Reading and Writing},
2109+
xdata = {PSF2024TPSL},
2110+
url = {https://docs.python.org/3/library/csv.html},
2111+
urldate = {2024-11-14},
2112+
}
2113+
20852114
@inbook{PSF2024CUFWSC,
20862115
title = {\pythonilIdx{contextlib} -- Utilities for \pythonilIdx{with}-Statement Contexts},
20872116
xdata = {PSF2024TPSL},
@@ -2358,6 +2387,16 @@ @article{S1998LHATFGAOCM
23582387
doi = {10.2307/2691200},
23592388
}
23602389

2390+
@techreport{S2005CFAMTFCSVCF,
2391+
author = a_shafranovich_y,
2392+
title = {Common Format and MIME Type for Comma-Separated Values~(\acrshort[hyper=true]{CSV}) Files},
2393+
xdata = {rep_rfc},
2394+
number = {4180},
2395+
date = {2005-10},
2396+
url = {https://datatracker.ietf.org/doc/html/rfc4180.html},
2397+
urldate = {2024-11-14}
2398+
}
2399+
23612400
@book{S2022FLAATIMEOLPBOC,
23622401
author = a_sigler_laurence_e,
23632402
title = {Fibonacci's Liber Abaci:~{A} Translation into Modern English of Leonardo Pisano's Book of Calculation},

book.tex

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,24 @@
33
%
44
\setsecnumdepth{subsubsection}% number down to sub-sub-sections
55
%
6-
% prepare and set the book margins
6+
%% prepare and set the book margins
77
\setlrmarginsandblock{3cm}{3cm}{*}%
88
\setulmarginsandblock{2.5cm}{2.5cm}{*}%
99
\setlength{\parskip}{3pt}%
10+
%
11+
%% Allow more space per page to be consumed by floats.
12+
%% This is helpful when we later have larger programs as examples.
13+
\renewcommand{\topfraction}{.85}%
14+
\renewcommand{\bottomfraction}{.85}%
15+
\renewcommand{\textfraction}{.15}%
16+
\renewcommand{\floatpagefraction}{.8}%
17+
\renewcommand{\dbltopfraction}{.8}%
18+
\renewcommand{\dblfloatpagefraction}{.8}%
19+
\setcounter{topnumber}{9}%
20+
\setcounter{bottomnumber}{9}%
21+
\setcounter{totalnumber}{20}%
22+
\setcounter{dbltopnumber}{9}%
23+
%
1024
\checkandfixthelayout%
1125
%
1226
\usepackage{styles/styles}% import our own basic styles

notation/acronyms.sty

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
%%%
44
%
55
\newacronym[description={Artificial Intelligence, see, e.g.,~\cite{RN2022AIAMA}}]{AI}{AI}{Artificial Intelligence}%
6+
\newacronym[description={Comma-Separated Values, see, e.g.,~\cite{PSF2024CCFRAW,S2005CFAMTFCSVCF} and~\pgls{csv}}]{CSV}{CSV}{Comma-Separated Values}%
67
\newacronym[description={Data Science, see, e.g.,~\cite{G2019DSFSFPWP}}]{DS}{DS}{Data Science}%
78
\newacronym[description={Garbage In--Garbage Out, see, e.g.,~\cite{PKBCBR2005GIGOAELAOMBEUP}}]{GIGO}{GIGO}{Garbage In--Garbage Out}
89
\newacronym[description={Integrated Development Environment, see \gls{ide}}]{IDE}{IDE}{Integrated Development Environment}%

notation/terms.sty

+11
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@ See \cref{sec:howFloatingPointNumbersWork}.%
1414
}%
1515
%
1616
%
17+
\newglossaryentry{csv}{%
18+
name={CSV},%
19+
description={%
20+
A very common and simple text format for exchanging tabular or matrix data is \acrfull{CSV}~\cite{S2005CFAMTFCSVCF}. %
21+
Each row in the text file represents one row in the table or matrix. %
22+
The elements in the row are separated by a fixed delimiter, usually a comma~(\inQuotes{,}), sometimes a semicolon~(\inQuotes{;}). %
23+
\python\ offers some out-of-the-box \pgls{CSV} support in the \pythonilIdx{csv}~module~\cite{PSF2024CCFRAW}.%
24+
}%
25+
}%
26+
%
27+
%
1728
\newglossaryentry{docstring}{%
1829
name={docstring},%
1930
description={%

text/main/controlFlow/iteration/iteration.tex

+96-5
Original file line numberDiff line numberDiff line change
@@ -486,15 +486,16 @@
486486

487487
\gitPythonAndErrorOutput{\programmingWithPythonCodeRepo}{07_iteration}{generator_expressions_next_2.py}{--args format}{iteration:generator_expressions_next_2}{%
488488
An investigation of the lazy evaluation in generator expressions\pythonIdx{Generator} by using the \pythonilIdx{next} function.}%
489-
489+
\afterpage{\clearpage}%
490+
%
490491
To better understand this, let us try to construct a generator expression which explicitly tells us when an element is created.
491492
For this, we first create the function \pythonil{as_str} in \cref{lst:iteration:generator_expressions_next_2},.
492493
\pythonil{as_str} accepts an integer parameter \pythonil{a} as input.
493494
It immediately writes \pythonil{f"input is \{a\}"} to the \pgls{stdout}.
494495
This means that whenever \pythonil{as_str} is called, we will immediately see some output.
495496
Then, it returns simply the string representation of~\pythonil{a}, i.e., \pythonil{str(a)}\pythonIdx{str}.
496497

497-
Let us first use this function a list comprehension \pythonil{lst = [as_str(j) for j in range(3)]}.
498+
Let us first use this function in a list comprehension \pythonil{lst = [as_str(j) for j in range(3)]}.
498499
The list comprehension creates the entire list.
499500
This means in the moment this line of code is executed and \pythonil{lst} is created, our function~\pythonil{as_str} will be invoked three times, with \pythonil{a = 0}, \pythonil{a = 1}, and \pythonil{a = 2}.
500501
We can see that this happens when the list is created, because all the \pythonil{as_str}-output happens before the \pythonil{print("list created")} completes.
@@ -531,7 +532,7 @@
531532
\gitPythonAndOutput{\programmingWithPythonCodeRepo}{07_iteration}{generator_expressions_in_reduction.py}{--args format}{iteration:generator_expressions_in_reduction}{%
532533
The use of generator expressions in reducing functions, such as \pythonilIdx{sum}, \pythonilIdx{min}, \pythonilIdx{max}, \pythonilIdx{all}, and~\pythonilIdx{any}\pythonIdx{Generator}.}%
533534
%
534-
Generator expressions come in especially handy when we want to reduce a sequence of data to a single number.
535+
Generator expressions come in especially handy when we want to reduce or aggregate a sequence of data to a single number.
535536
\Cref{lst:iteration:generator_expressions_in_reduction} shows several examples for such computations.
536537

537538
First, we want to sum up the squares of all numbers from~0 to~999'999.
@@ -595,6 +596,8 @@
595596
%
596597
\begin{sloppypar}%
597598
Finally, generator expressions can also be passed to the constructors of collection datastructures or other functions that create such datastructures.
599+
Assume that we are processing numbers stored in a \pgls{CSV} format.
600+
Often, the rows of text files with tabular or matrix data are in this format.
598601
In \cref{lst:iteration:generator_expressions_to_collection}, we first define a string \pythonil{csv_text} with the value~\pythonil{"22,56,33,67,43,33,12"}.
599602
Invoking \pythonil{csv_text.split(",")}\pythonIdx{split}\pythonIdx{str!split} will split the string into a list of single strings based on the delimiter~\pythonil{","}.
600603
This will thus yield \pythonil{["22", "56", "33", "67", "43", "33", "12"]}.
@@ -731,11 +734,99 @@
731734
Different from \pythonil{takewhile}, the new \pythonilIdx{Iterator} created by \pythonilIdx{filter} does not stop if the predicate returns~\pythonil{False}.
732735
However, it only returns only those elements for which the predicate returned~\pythonil{True}.
733736
In \cref{lst:iteration:filter_takewhile}, we use this to select prime numbers~$x$ for which an integer~$y$ exists such that~$x=y^2+1$.
734-
This time, we implement the predicate as function \pythonil{sqr_plus_1} and pass this function to \pythonilIdx{filter}.
737+
This time, we implement the predicate as function \pythonil{is_sqr_plus_1} and pass this function to \pythonilIdx{filter}.
735738
Since there again probably infinitely many such prime numbers, we return only those that are less than~1000, for which we use~\pythonil{takewhile}.
736-
The results can be seen in \cref{exec:iteration:filter_takewhile}.
739+
The results can be seen in \cref{exec:iteration:filter_takewhile}:
740+
There are ten such primes.
741+
The smallest one is $1^2+1=2$ and the largest one is~$26^2+1=677$.
742+
743+
\gitPythonAndOutput{\programmingWithPythonCodeRepo}{07_iteration}{map.py}{--args format}{iteration:map}{%
744+
An example for the function \pythonilIdx{map}.}%
745+
%
746+
Another important utility function when dealing with sequences is the function~\pythonilIdx{map}.
747+
We explore its use in \cref{lst:iteration:map}.
748+
Back in \cref{lst:iteration:generator_expressions_to_collection}, we used a generator expression to process data that we exracted from a \pgls{CSV}-formatted string.
749+
Instead of doing \pythonil{int(s) for s in csv_text.split(",")} we can simply write \pythonil{map(int, csv_text.split(",")}.
750+
The first argument to \pythonilIdx{map} is a function that should be applied all of the elements in the sequence passed in as its second argument.
751+
The result of \pythonilIdx{map} is a new sequence with the return values of this function.
752+
In \cref{lst:iteration:map}, we map the string \pythonil{csv_text} split at all~\pythonil{","} to \pythonilsIdx{int} and then \pythonilIdx{filter} the sequence to retain only values greater than~20.
753+
We can conveniently iterate over the resulting filtered and mapped sequence using a~\pythonilIdx{for}~loop.
754+
755+
How about we now obtain all the unique squares of the values in the CSV data, i.e., we discard all duplicate squares.
756+
First, we again use \pythonilIdx{split}\pythonIdx{str!split} to divide the text into chunks based on the separator~\pythonil{","}.
757+
Then we map these chunks to integers and return their squares using the \pythonilIdx{map}~function, but this provide a~\pythonilIdx{lambda} that does the transformation.
758+
Now we want to retain only the unique values.
759+
This can be done by passing the resulting \pythonilIdx{Iterator} into the \pythonilIdx{set} constructor.
760+
A set, by definition, only contains unique values.
761+
In the resulting output in \cref{exec:iteration:map}, we can see that \textil{9}~indeed only appears once and so does~\textil{144}.
762+
763+
Finally, the \pythonilIdx{map} function also plays nicely together with aggregating functions like~\pythonilIdx{sum}, \pythonilIdx{min}, or~\pythonilIdx{max}.
764+
In the final example for \pythonilIdx{map}, we have a list of words~\pythonil{words} and want to know the length of the longest word.
765+
We can first map each word to its length via~\pythonil{map(len, words)}.
766+
This produces an \pythonilIdx{Iterator} of word lengths, which we can directly pass to~\pythonilIdx{max}.
767+
768+
Notice that \pythonilIdx{map} does not generate a data structure with all the transformed elements in memory.
769+
Instead, the elements are constructed as needed (and thereafter disposed by the garbage collection when no longer needed).
770+
This makes \pythonilIdx{map} an elegant and efficient approach to transforming sequences of data.
771+
772+
\gitPython{\programmingWithPythonCodeRepo}{07_iteration/zip.py}{--args format}{iteration:zip}{%
773+
An example for the function \pythonilIdx{zip}.}%
774+
%
775+
\gitOutputTool{\programmingWithPythonCodeRepo}{.}{scripts/pytest_doctest.sh 07_iteration zip.py}{iteration:zip:doctest}{%
776+
The output of \pytest\ executing the \pglspl{doctest} for the \pythonilIdx{zip} example from \cref{lst:iteration:zip}.}%
777+
%
778+
\begin{sloppypar}%
779+
As last example for sequence processing we play a bit with the \pythonilIdx{zip} function.
780+
This function accepts several \pythonilsIdx{Iterables} as argument and returns a new \pythonilIdx{Iterator} which steps through all of input iterables in synch, returning tuples of with one value of each of them.
781+
For example, \pythonil{zip([1, 2, 3], ["a", "b", "c"])} returns an \pythonilIdx{Iterator} that produces the the sequence~\pythonil{(1, "a")}, \pythonil{(2, "b")}, and \pythonil{(3, "c")}.
782+
Sometimes, the input \pythonilsIdx{Iterable} may be of different length.
783+
To make sure that such an error is properly reported with a~\pythonilIdx{ValueError}, we must always supply the named argument~\pythonil{strict=True}~\cite{PEP618}.%
784+
\end{sloppypar}%
785+
%
786+
In \cref{lst:iteration:zip}, we use \pythonilIdx{zip} to implement a function \pythonil{distance} that computes the Euclidean distance of two $n$\nobreakdashes-dimensional vectors or points~\pythonil{p1} and~\pythonil{p2}.
787+
The two points are supplied as \pythonilsIdx{Iterable} of either \pythonil{float} or \pythonil{int}.
788+
We could, for example, provide them as \pythonils{lists}
789+
The Euclidean distance is defined as%
790+
%
791+
\begin{equation}%
792+
\pythonil{distance(p1, p2)} = \sqrt{\sum_{i=1}^n (\pythonil{p1}_i - \pythonil{p2}_i)^2}%
793+
\label{eq:euclideanDistance}%
794+
\end{equation}%
795+
%
796+
This means that we need to iterate over both points in lockstep.
797+
This is exactly what \pythonilIdx{zip} does.
798+
If both points were provides as~\pythonils{list}, then \pythonil{zip(p1, p2, strict=True)} will, step by step, give us the tuples~\pythonil{(p1[0], p2[0])}, \pythonil{(p1[1], p2[1])}, {\dots}, until reaching the ends of the lists.
799+
We can now write the generator expression~\pythonil{(a - b) ** 2 for a, b in zip(p1, p2, strict=True)}.
800+
It uses tuple expansion to extract the two elements~\pythonil{a} and \pythonil{b} from each of the tuples that \pythonilIdx{zip} creates.
801+
It then computes the square of the difference of these two elements.
802+
By passing the generator expression to the~\pythonilIdx{sum} function as-is, we can get the sum of these squares.
803+
Finally, the \pythonilIdx{sqrt} function from the \pythonilIdx{math} completes the computation of the Euclidean distance as prescribed in \cref{eq:euclideanDistance}.
804+
805+
Instead of testing this new function~\pythonil{distance} with a small example program, we do so with \pglspl{doctest}.
806+
The \pgls{doctest} shows that the expected distance of two identical vectors with the same value~\pythonil{[1, 1]} should be~\pythonil{0.0}.
807+
\pythonil{distance((0.0, 1.0, 2.0, 3.0), (1.0, 2.0, 3.0, 4.0))}, which basically is~$\sqrt{1 + 1 + 1 + 1}$ should be~\pythonil{2.0}.
808+
The distance of the two one-dimensional vectors~\pythonil{[100]} and~\pythonil{[10]} should be~\pythonil{90.0}.
809+
If we, however, pass in two vectors with different dimensions, this should result in a~\pythonilIdx{ValueError}.
810+
The output of \pytest\ in \cref{exec:iteration:zip:doctest} shows that the example cases all return their expected results.
811+
812+
This concludes our treatment of operations on \pythonilsIdx{Iterator}.
813+
We could only scratch the surface here.
814+
The module~\pythonilIdx{itertools}~\cite{PSF2024IFCIFEL} which ships with \python\ offers many more useful functions.
815+
However, an understanding of the principles of \pythonilIdx{map}, \pythonilIdx{filter}, and~\pythonilIdx{zip} will enable the reader to explore these tools by themselves.%
737816
\FloatBarrier%
738817
\endhsection%
739818
%
819+
%
820+
\hsection{Summary}%
821+
Working with sequences is a very important aspect of \python\ programming.
822+
The programming language provides a simplified syntax for working with loops in form of list, set, and dict comprehension.
823+
Different from comprehension, generator expressions allow us to provide sequences of data that can be processed without storing all elements in memory first or at once.
824+
Instead, the elements are created when needed.
825+
If this creation of elements is more complicated than what simple generator expressions can, well, express, we can use generator functions.
826+
With their \pythonilIdx{yield} statement, they allow us to write functions that perform a computation, pass the result to their output, allow other code outside to process the result, and then resume with the generation of the next element.
827+
Finally, sequences of data can be processed by aggregating and transforming functions.
828+
These functions can process containers, comprehensions, generator expressions, and generators alike.%
829+
\endhsection%
830+
%
740831
\endhsection%
741832
%

0 commit comments

Comments
 (0)