Skip to content

Commit 093b690

Browse files
committed
basic string operations added
1 parent ce3f81b commit 093b690

File tree

7 files changed

+174
-10
lines changed

7 files changed

+174
-10
lines changed

bibliography/bibliography.bib

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ @string { a_liu_yuxi
123123
@string { a_loy_marc = "Marc Loy" }
124124
@string { a_mann_samuel = "Samuel Mann" }
125125
@string { a_massa_francisco = "Francisco Massa" }
126+
@string { a_maxwell_aaron = "Aaron Maxwell" }
126127
@string { a_mayorov_nikolay = "Nikolay Mayorov" }
127128
@string { a_michel_vincent = "Vincent Michel" }
128129
@string { a_millman_jarrod_k = "K.\ Jarrod Millman" }
@@ -214,6 +215,7 @@ @string { a_zinoviev_dimitry
214215

215216
%% locations
216217
@string { l_australia_hobart = "{{Hobart}, {TAS}, {Australia}}" }
218+
@string { l_canada_oakville = "{{Oakville}, {ON}, {Canada}}" }
217219
@string { l_canada_vancouver = "{{Vancouver}, {BC}, {Canada}}" }
218220
@string { l_portugal_lisbon = "{{Lisbon}, {Portugal}}" }
219221
@string { l_switzerland_cham = "{{Cham}, {Switzerland}}" }
@@ -255,9 +257,10 @@ @string { p_cornell_university_library
255257
@string { p_github = "{{GitHub} Inc}" }
256258
@string { p_iec = "{International Electrotechnical Commission~{(IEC)}}" }
257259
@string { p_ieee = "{Institute of Electrical and Electronics Engineers~{(IEEE)}}" }
260+
@string { p_infinite_skills = "Infinite Skills Inc" }
261+
@string { p_informs = "{The Institute for Operations Research and the Management Sciences~({INFORMS})}" }
258262
@string { p_iso = "{International Organization for Standardization~{(ISO)}}" }
259263
@string { p_iso_iec = "{International Organization for Standardization~{(ISO)} / International Electrotechnical Commission~{(IEC)}}" }
260-
@string { p_informs = "{The Institute for Operations Research and the Management Sciences~({INFORMS})}" }
261264
@string { p_manning_publications = "{Manning Publications}" }
262265
@string { p_microsoft_press = "{Microsoft Press}, " # p_pearson_education }
263266
@string { p_mit_press = "{{MIT} Press}" }
@@ -290,6 +293,7 @@ @string { pa_github
290293
@string { pa_iso_iec = l_switzerland_geneva }
291294
@string { pa_ieee = l_usa_piscataway }
292295
@string { pa_ieee_ny = l_usa_new_york }
296+
@string { pa_infinite_skills = l_canada_oakville }
293297
@string { pa_informs = l_usa_catonsville }
294298
@string { pa_manning_publications = l_usa_shelter_island }
295299
@string { pa_microsoft_press = pa_pearson_education }
@@ -732,6 +736,15 @@ @book{LNL2020LJ
732736
isbn = {9781492056270},
733737
}
734738

739+
@book{M2017WAFSIPAHCIUT,
740+
author = a_maxwell_aaron,
741+
title = {What are f-strings in \python\ and how can I use them?},
742+
publisher = p_infinite_skills,
743+
address = pa_infinite_skills,
744+
date = {2017-06},
745+
isbn = {9781491994863}
746+
}
747+
735748
@article{N1939TTOP,
736749
author = a_niven_ivan,
737750
title = {The Transcendence of~$\pi$},
@@ -847,6 +860,13 @@ @inbook{PSF2024PSAU
847860
urldate = {2024-07-05},
848861
}
849862

863+
@inbook{PSF2024TSTS,
864+
title = {Text Sequence Type -- \pythonilIdx{str}},
865+
crossref = {PSF2024P3D},
866+
url = {https://docs.python.org/3/library/stdtypes.html#textseq},
867+
urldate = {2024-07-25},
868+
}
869+
850870
@article{PVGMTGBPWDVPCBPD2011SMLIP,
851871
author = a_pedregosa_fabian # and # a_varoquaux_gael # and # a_gramfort_alexandre # and # a_michel_vincent # and # a_thirion_bertrand # and # a_grisel_olivier # and # a_blondel_mathieu # and # a_prettenhofer_peter # and # a_weiss_ron # and # a_dubourg_vincent # and # a_vanderplas_jake # and # a_passos_alexandre # and # a_cornapeau_david # and # a_brucher_matthieu # and # a_perrot_matthieu # and # a_duchesnay_edouard,
852872
title = {\scikitlearn: Machine Learning in \python},

text/main/basics/simpleDataTypesAndOperations/bool/bool.tex

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
\pythonil{6 < 6}\pythonIdx{<} is also \pythonilIdx{False} while \pythonil{6 <= 6} is, of course, \pythonilIdx{True}.
3838
While \pythonil{5 > 6}\pythonIdx{>} is not \pythonilIdx{True}, \pythonil{6 > 5}\pythonIdx{>} is.
3939
It is also possible to compare floating point numbers with integers and vice versa.
40-
\pythonil{5.5 == 5}\pythonIdx{==} is \pythonilIdx{False}, while \pythonilIdx{5.0 == 5} is \pythonilIdx{True}.
40+
\pythonil{5.5 == 5}\pythonIdx{==} is \pythonilIdx{False}, while \pythonil{5.0 == 5} is \pythonilIdx{True}.
4141

4242
Comparisons can also be chained:
4343
\pythonil{3 < 4 < 5 < 6} is \pythonilIdx{True}, because \pythonil{3 < 4} and \pythonil{4 < 5} and \pythonil{5 < 6}.
@@ -59,7 +59,7 @@
5959
\centering%
6060
%
6161
\subfloat[][%
62-
The truth table for the logical conjunction (\emph{logical and}\pythonIdx{and}): \pythonil{a and b}.%
62+
The truth table for the logical conjunction\pythonIdx{bool!conjunction} (\emph{logical and}\pythonIdx{and}): \pythonil{a and b}.%
6363
\label{fig:booleanAnd}%
6464
]{%
6565
~~~~%
@@ -82,7 +82,7 @@
8282
\strut\hfill\strut%
8383
%
8484
\subfloat[][%
85-
The truth table for the logical disjunction (\emph{logical or})\pythonIdx{or}: \pythonil{a or b}.%
85+
The truth table for the logical disjunction\pythonIdx{bool!disjunction} (\emph{logical or})\pythonIdx{or}: \pythonil{a or b}.%
8686
\label{fig:booleanOr}%
8787
]{%
8888
~~~~%
@@ -105,7 +105,7 @@
105105
\strut\hfill\strut%
106106
%
107107
\subfloat[][%
108-
The truth table for the logical negation (\emph{logical not}\pythonIdx{not}): \pythonil{not a}.%
108+
The truth table for the logical negation\pythonIdx{bool!negation} (\emph{logical not}\pythonIdx{not}): \pythonil{not a}.%
109109
\label{fig:booleanNot}%
110110
]{%
111111
~~~~%
@@ -137,11 +137,11 @@
137137
%
138138
\begin{itemize}%
139139
%
140-
\item A Boolean conjunction, i.e., \pythonilIdx{and}, is \pythonilIdx{True} if and only both of its operands are also \pythonilIdx{True} and \pythonilIdx{False} otherwise, as shown in \cref{fig:booleanAnd}.%
140+
\item A Boolean conjunction\pythonIdx{bool!conjunction}, i.e., \pythonilIdx{and}, is \pythonilIdx{True} if and only both of its operands are also \pythonilIdx{True} and \pythonilIdx{False} otherwise, as shown in \cref{fig:booleanAnd}.%
141141
%
142-
\item A Boolean disjunction, i.e., \pythonilIdx{and}, is \pythonilIdx{True} if at least one of its two operands is \pythonilIdx{True} and \pythonilIdx{False} otherwise, as shown in \cref{fig:booleanOr}.%
142+
\item A Boolean disjunction\pythonIdx{bool!disjunction}, i.e., \pythonilIdx{and}, is \pythonilIdx{True} if at least one of its two operands is \pythonilIdx{True} and \pythonilIdx{False} otherwise, as shown in \cref{fig:booleanOr}.%
143143
%
144-
\item The Boolean negation, i.e., \pythonilIdx{not}, is \pythonilIdx{True} if its operand is \pythonilIdx{False}. %
144+
\item The Boolean negation\pythonIdx{bool!negation}, i.e., \pythonilIdx{not}, is \pythonilIdx{True} if its operand is \pythonilIdx{False}. %
145145
Otherwise, it is \pythonilIdx{False}, as shown in \cref{fig:booleanNot}.%
146146
%
147147
\end{itemize}%

text/main/basics/simpleDataTypesAndOperations/introduction/introduction.tex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
\item \pythonilIdx{int}: the integer datatype, which represents integers numbers~\integerNumbers~(\cref{sec:int}),%
4242
\item \pythonilIdx{float}: the floating point numbers, i.e., a subset of the real numbers~\realNumbers~(\cref{sec:float}),%
4343
\item \pythonilIdx{bool}: Boolean values, which can be either \pythonilIdx{True} or \pythonilIdx{False}~(\cref{sec:bool}),%
44-
\item \pythonilIdx{str}: strings, i.e., portions of text of arbitrary length, and.%
45-
\item \pythonilIdx{None}: nothing, which is the result of any command that does not explicilty return a value.%
44+
\item \pythonilIdx{str}: strings, i.e., portions of text of arbitrary length~(\cref{sec:str}), and.%
45+
\item \pythonilIdx{None}: nothing, which is the result of any command that does not explicitly return a value.%
4646
%
4747
\end{itemize}%
4848
%

text/main/basics/simpleDataTypesAndOperations/simpleDataTypesAndOperations.tex

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
\hinput{int}{int.tex}%
55
\hinput{float}{float.tex}%
66
\hinput{bool}{bool.tex}%
7+
\hinput{str}{str.tex}%
78
%
89
\endhsection%
910
%
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
\hsection{Text Strings}%
2+
\label{sec:str}%
3+
%
4+
The fourth important datatype in \python\ are text strings.
5+
Text strings are sequences of characters of an arbitrary length.
6+
In \python, they are represented by the datatype \pythonilIdx{str}.
7+
Indeed, we have already used it before, even in our very first example program back that simply printed \pythonil{"Hello World"} in \cref{lst:very_first_program} in \cref{sec:ourFirstProgram}.
8+
\pythonil{"Hello World"} is such a text string.%
9+
%
10+
\hsection{Basic String Operations}%
11+
%
12+
\begin{figure}%
13+
\centering%
14+
\includegraphics[width=0.8\linewidth]{\currentDir/strIndexing}%
15+
\caption{Specifying string literals and indexing its characters.}%
16+
\label{fig:strIndexing}%
17+
\end{figure}%
18+
%
19+
As \cref{fig:strIndexing} shows, there are two basic ways to specify a text string literal\pythonIdx{str!literal}:
20+
Either enclosed by double quotes, e.g., \pythonil{"Hello World!"}\pythonIdx{\textquotedbl} or enclosed by single quotes, e.g., \pythonil{'Hello World!'}\pythonIdx{\textquotesingle}.
21+
The double-quote variant is usually preferred and we should always use it in our programs.
22+
The quotation marks are only used to delimit the strings, i.e., to tell \python\ where the string begins or ends.
23+
They are not themselves part of the string.
24+
25+
One basic operation is string concatenation\pythonIdx{str!concatenation}\pythonIdx{str!+}\pythonIdx{+}:
26+
\pythonil{"Hello" + ' ' + "World"}\pythonIdx{\textquotedbl}\pythonIdx{\textquotesingle} concatenates the three strings \pythonil{"Hello"}, \pythonil{" "}, and \pythonil{"World"}.
27+
The result is \pythonil{"Hello World"}\pythonIdx{\textquotedbl}.
28+
Notice how the singe space character string is needed, because \pythonil{"Hello" + "World"} would just yield \pythonil{"HelloWorld"}.
29+
30+
Strings are different from the other datatypes we have seen so far.
31+
They are \emph{sequences}\pythonIdx{Sequence}, meaning that they are linear arrays composed of elements.
32+
These elements are the single characters, which correspond to letters, numbers, punctuation marks, white space, etc.
33+
34+
One basic set of things that we can do with strings is to extract these single characters.
35+
First, we need to know the length of a string.
36+
For this purpose, we can invoke the \pythonilIdx{len}\pythonIdx{str!len}\pythonIdx{str!length} function:
37+
\pythonil{len("Hello")} is \pythonil{5}, because there are five characters in \inQuotes{Hello}.
38+
\pythonil{len("Hello World!")} would give us \pythonil{12}, because \pythonil{"Hello"} has five characters, \pythonil{"World!"} has six characters (the \pythonil{"!"} does count!) and there is the single space character in the middle, so $5+6+1=12$.
39+
40+
Knowing the length\pythonIdx{str!length} of a string, we can now safely access its single characters.
41+
These characters are obtained using the square brackets \pythonil{[]}\pythonIdx{str![]}\pythonIdx{[}\pythonIdx{]} with the character index inbetween.
42+
The character indexes start at~0.
43+
Therefore, \pythonil{"Hello"[0]}\pythonIdx{str![]}\pythonIdx{[}\pythonIdx{]} returns the first character of \pythonil{"Hello"} as a \pythonilIdx{str}, which is \pythonil{"H"}\pythonIdx{\textquotedbl}.
44+
\pythonil{"Hello"[1]} returns the second character, which is \pythonil{"e"}.
45+
\pythonil{"Hello"[2]} returns the third character, which is \pythonil{"l"}.
46+
\pythonil{"Hello"[3]}\pythonIdx{str![]}\pythonIdx{[}\pythonIdx{]} gives us the second \pythonil{"l"}.
47+
Finally, \pythonil{"Hello"[4]} gives us the fifth and last character, namely \pythonil{"o"}\pythonIdx{\textquotedbl}.
48+
If we would try to access a character outside of the valid range of the string, say \pythonil{"Hello"[5]}, this results in an \pythonilIdx{IndexError}.
49+
We learn later what errors are and how to handle them -- for now, it is sufficient to know that they will stop your program.
50+
And rightly so, because \pythonil{"Hello"}\pythonIdx{\textquotedbl} has only five characters and accessing the sixth one is not possible and would have an undefined result.
51+
52+
Negative indices, however, are permitted:
53+
The index \pythonil{-1} just means \inQuotes{last character}, so \pythonil{"Hello"[-1]} yields the string \pythonil{"o"}.
54+
The index \pythonil{-2} then refers to the \inQuotes{second-to-last character}, so \pythonil{"Hello"[-2]} gives us \pythonil{"l"}.
55+
The third character from the end, accessed via index \pythonil{-3}, is again \pythonil{"l"}.
56+
\pythonil{"Hello"[-4]} gives us \pythonil{"e"} and \pythonil{"Hello"[-5]} gives us \pythonil{"H"}.
57+
Of course, using a negative index that would bring us out of the string's valid range, such as \pythonil{-6}, again yields an \pythonilIdx{IndexError}.
58+
59+
We can also obtain whole substrings by using index ranges, where the inclusive starting index and the \emph{exclusive} end index are separated by a~\pythonilIdx{:}.
60+
In other words, applying the index \pythonil{[a:b]} to a string results in all characters in the index range from \pythonil{a} to \pythonil{b - 1}.
61+
\pythonil{"Hello"[0:3]} yields a string composed of the characters at positions~0, 1, and~2 inside \pythonil{"Hello"}, i.e., \pythonil{"Hel"}.
62+
The end index is always excluded, so the character at index~3 is not part of the result.
63+
If we do \pythonil{"Hello"[1:3]}, we get \pythonil{"He"}, because only the characters at indices~1 and~2 are included.
64+
If we do not specify an end index, then everything starting at the start index until the end of the string is included.
65+
This means that \pythonil{"Hello"[2:]} will return all the text starting at index~2, which is \pythonil{"llo"}.
66+
We can also use negative indices, if we want.
67+
Therefore, \pythonil{"Hello"[1:-2]} yields \pythonil{"el"}
68+
Finally, we can also omit the start index, in which case everything until right before the end index is returned.
69+
Therefore, \pythonil{"Hello"[:-2]} will return everything from the beginning of the string until right before the second-to-last character.
70+
This gives us \pythonil{"Hel"}.
71+
72+
\begin{figure}%
73+
\centering%
74+
\includegraphics[width=0.8\linewidth]{\currentDir/strBasicOps}%
75+
\caption{Some more basic string operations.}%
76+
\label{fig:strBasicOps}%
77+
\end{figure}%
78+
79+
Besides concatenating and extracting substrings, the \pythonilIdx{str} datatype supports many other operations.
80+
Here, we can just discuss the few most commonly used ones.
81+
82+
There are several ways to check whether one string is contained in another one.
83+
The first method is to use the \pythonilIdx{in} keyword.
84+
As \cref{fig:strBasicOps} shows, \pythonil{"World" in "Hello World!"} yields \pythonilIdx{True}, as it checks whether \pythonil{"World"} is contained in \pythonil{"Hello World!"}, which is indeed the case.
85+
\pythonil{"Earth" in "Hello World!"} is \pythonilIdx{False}, because \pythonil{"Earth"} is not contained in \pythonil{"Hello World!"}.
86+
87+
Often, however, we do not just want to know whether a string is contained in another one, but also \emph{where} it is contained.
88+
For this, the \pythonilIdx{find} method exists.
89+
\pythonil{"Hello World!".find("World")} tries to find the position of \pythonil{"World"} inside \pythonil{"Hello World!"}.
90+
It returns \pythonil{6}, because the \inQuotes{W} of \inQuotes{World} is the seventh character in this string and the indices are zero-based.
91+
Trying to find the \pythonil{"world"} in \pythonil{"Hello World!"} yields~\pythonil{-1}, however.
92+
\pythonil{-1} means that the string cannot be found.
93+
We learn that string operations are case-sensitive\pythonIdx{str!case-sensitive}:
94+
\pythonil{"World" != "world"} would be \pythonilIdx{True}.
95+
We also learn that we need to be careful not to use the result of \pythonilIdx{find} as index in a string directly before checking that it is \pythonil{>= 0}!
96+
As you have learned, \pythonil{-1} is a perfectly fine index into a string, even though it means that the string we tried to find was not found.
97+
98+
Sometimes, the text we are looking for is contained multiple times in a given string.
99+
For example, \pythonil{"Hello World!".find("l")} returns~\pythonil{2}, because \inQuotes{l} is the third character in the string.
100+
However, it is also the fourth character in the string.
101+
\pythonilIdx{find} accepts an optional second parameter, namely the starting index where the search should begin.
102+
\pythonil{"Hello World!".find("l", 3)} begins to search for \pythonil{"l"} inside \pythonil{"Hello World!"} starting at index~3.
103+
Right at that index, the second~\inQuotes{l} is found, so that \pythonil{3} is also returned.
104+
If we search for another~\inQuotes{l} after that, we would do \pythonil{"Hello World!".find("l", 4)}, which returns index~9, identifying the~\inQuotes{l} in~\inQuotes{World}.
105+
After that, no more~\inQuotes{l} can be found in the string, so \pythonil{"Hello World!".find("l", 10)} results in a~\pythonil{-1}.%
106+
%
107+
\begin{sloppypar}%
108+
While \pythonilIdx{find} returns the first occurrence of a string in the supplied range, we sometimes want the last occurrence instead.
109+
If we want to search from the end of the string, we use \pythonilIdx{rfind}.
110+
\pythonil{"Hello World!".rfind("l")} gives us~\pythonil{9} directly.
111+
If we want to search for the~\inQuotes{l} before that one, we need to supply an inclusive starting and exclusive ending index of the range to be searched.
112+
\pythonil{"Hello World!".rfind("l", 0, 9)} searches for any~\inQuotes{l} from index~8 down to~0 and thus returns~\pythonil{3}.
113+
\pythonil{"Hello World!".rfind("l", 0, 3)} gives us~\pythonil{2} and since there is no~\inQuotes{l} before that, \pythonil{"Hello World!".rfind("l", 0, 2)} yields~\pythonil{-1}.
114+
\end{sloppypar}%
115+
%
116+
\begin{sloppypar}%
117+
Another common operation is to replace substrings with something else.
118+
\pythonil{"Hello World!".replace("Hello", "Hi")}\pythonIdx{replace} replaces all occurrences of \inQuotes{"Hello"} in \inQuotes{Hello World} with \inQuotes{Hi}.
119+
The result is \pythonil{"Hi World!"} and \pythonil{"Hello Hello World!".replace("Hello", "Hi")} becomes \pythonil{"Hi Hi World!"}.
120+
\end{sloppypar}%
121+
%
122+
\begin{sloppypar}%
123+
Often, we want to remove all leading or trailing whitespace characters from a string.
124+
The \pythonilIdx{strip} function does this for us:
125+
\pythonil{" Hello World! ".strip()} returns \pythonil{"Hello World!".strip()}, i.e., the same string, but with the leading and trailing space removed.
126+
If we only want to remove the spaces on the left-hand side, we use \pythonilIdx{lstrip} and if we only want to remove those on the right-hand side, we use \pythonilIdx{rstrip} instead.
127+
Therefore, \pythonil{" Hello World! ".lstrip()} yields \pythonil{"Hello World! "} and \pythonil{" Hello World! ".rstrip()} gives us \pythonil{" Hello World!"}.
128+
\end{sloppypar}%
129+
%
130+
In alphabet-based languages, we usually can distinguish between uppercase\pythonIdx{str!uppercase} characters, such as \inQuotes{H} and \inQuotes{W}, and lowercase\pythonIdx{str!lowercase}, such as \inQuotes{e}, \inQuotes{l}, and~\inQuotes{o}.
131+
The method \pythonilIdx{lower} transforms all characters in a string to lowercase and \pythonilIdx{upper} translates them to uppercase instead.
132+
Thus \pythonil{"Hello World!".lower()} returns \pythonil{hello world!} whereas \pythonil{"Hello World!".upper()} yields \pythonil{"HELLO WORLD!"}.
133+
134+
As final functions, we can check whether a string begins or ends with another, we can use \pythonilIdx{startswith} and \pythonilIdx{endswith}, respectively.
135+
\pythonil{"Hello World!".startswith("hello")} is \pythonilIdx{False} whereas \pythonil{"Hello World!".startswith("Hello")} is \pythonilIdx{True}.
136+
\pythonil{"Hello World!".endswith("Hello")} is \pythonilIdx{False}, too, but \pythonil{"Hello World!".endswith("World!")} is \pythonilIdx{True}.
137+
138+
Of course, these were just a small selection of the many string operations available in \python.
139+
You can find more in the \href{https://docs.python.org/3/library/stdtypes.html\#textseq}{official documentation}~\cite{PSF2024TSTS}.%
140+
\endhsection%
141+
%
142+
\endhsection%
143+
%
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)