Wikia · martynaut · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 7, 2024
@@ -14,6 +14,7 @@ Package providing simple Python access to data in:
 * AWS s3
 * MySQL
 * neo4j
+* SPARQL
 
 Sroka library was checked to work for Python **>=3.8, <=3.11**.
 

@@ -18,9 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
    "source": [
     "# GA API\n",
     "from sroka.api.ga.ga import ga_request\n",
@@ -43,10 +41,36 @@
     "from sroka.api.s3_connection.s3_connection_api import s3_download_data, s3_upload_data\n",
     "# MySQL API\n",
     "from sroka.api.mysql.mysql import query_mysql\n",
+    "# SPARQL API\n",
+    "from sroka.api.sparql.sparql import query_sparql\n",
     "\n",
     "# data wrangling\n",
     "import numpy as np"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "# SPARQL"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "df = query_sparql(\"\"\"SELECT \n",
+    "\t?game \n",
+    "\t?gameLabel \n",
+    "\t?gameTitle\n",
+    "WHERE {\n",
+    "\t?game wdt:P31 wd:Q7889 .  # instance of video game\n",
+    "}\n",
+    "LIMIT 10\"\"\", endpoint_url='https://query.wikidata.org/sparql')\n",
+    "df"
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -57,16 +81,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "df = query_athena(\"\"\"\n",
     "    SELECT '2019-03-01' as date\n",
     "    \"\"\")\n",
     "\n",
     "df"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -77,38 +101,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "# input a path to data on your s3, it is needed to perform any query\n",
     "s3_folder = ''\n",
     "\n",
     "s3_download_data('s3://{}'.format(s3_folder), prefix=True, sep=';')"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "# input bucket name and file path on your s3, it is needed to perform any query\n",
     "s3_bucket = ''\n",
     "s3_file_path = ''\n",
     "\n",
     "# create a test array\n",
     "arr = np.array(([1,2,3,4], [4,3,2,1]))"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "s3_upload_data(arr, bucket=s3_bucket, path=s3_file_path)"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -119,9 +143,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "start_day = '01'\n",
     "end_day='04'\n",
@@ -142,7 +164,9 @@
     "\n",
     "df_gam = get_data_from_admanager(query, dimensions, columns, start_date, stop_date)\n",
     "df_gam.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -153,9 +177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "# your account id, it is needed to perform any query\n",
     "your_id = ''\n",
@@ -172,7 +194,9 @@
     "\n",
     "df_ga = ga_request(request, print_sample_size=True, sampling_level='FASTER')\n",
     "df_ga.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -183,14 +207,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "new_sheet = google_drive_sheets_create('new_sheet')\n",
     "\n",
     "google_drive_sheets_write(df, new_sheet)"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -201,9 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "input_data_moat = {\n",
     "    'start' : '20190301',\n",
@@ -213,7 +235,9 @@
     "\n",
     "df_moat = get_data_from_moat(input_data_moat, 'moat')\n",
     "df_moat.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -224,17 +248,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "presto_query = \"\"\"\n",
     "        SELECT '2019-03-01' as date;\n",
     "        \"\"\"\n",
     "\n",
     "data_presto = request_qubole(presto_query, query_type='hive')\n",
     "data_presto.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -245,9 +269,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "input_data = {\n",
     "    'start' : '2018-08-23T00:00:00-07:00',\n",
@@ -264,7 +286,9 @@
     "\n",
     "data = get_data_from_rubicon(input_data)\n",
     "data.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -275,20 +299,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "df = query_mysql(\"SELECT * FROM clan LIMIT 10\")\n",
     "df.head()"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
+   "source": [],
    "outputs": [],
-   "source": []
+   "execution_count": null
   }
  ],
  "metadata": {

@@ -35,3 +35,6 @@ database: DATABASE
 neo4j_username: USERNAME
 neo4j_password: PASSWORD
 neo4j_address: ADDRESS:PORT
+
+[sparql]
+endpoint_url: ENDPOINT_URL
@@ -17,4 +17,5 @@ requests>=2.20
 retrying>=1.3.3
 urllib3>=1.26.18
 py2neo>=4.2.0
+SPARQLWrapper>=2.0.0
 db-dtypes
@@ -8,10 +8,10 @@
 
 setuptools.setup(
     name="sroka",
-    version="0.0.8",
+    version="0.0.9",
     author="Ad Engineering FANDOM",
     author_email="[email protected]",
-    description="Package for access GA, GAM, MOAT, Qubole, Athena, S3, Rubicon APIs, BigQuery, MySQL",
+    description="Package for access GA, GAM, MOAT, Qubole, Athena, S3, Rubicon APIs, BigQuery, MySQL, SPARQL",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/Wikia/sroka",

@@ -0,0 +1,22 @@
+import os
+from pathlib import Path
+
+
+def save_to_file(df, filename):
+    # Store the path in a cross-platform pathlib object to ensure compatibility
+    # with DOS & UNIX-based operating systems.
+    path = Path(filename)
+
+    # Get the parent directory of the given path, if it exists.
+    directory_path = str(path.parent.resolve())
+
+    # If the given path points to a folder, attempt to create it. If it already
+    # exists, the `exist_ok` option ensures that no exception will be thrown.
+    if directory_path != "":
+        os.makedirs(directory_path, exist_ok=True)
+
+    # Export the data in a CSV file.
+    try:
+        df.to_csv(filename)
+    except OSError as e:
+        print('Unable to write on filesystem: {}'.format(e))
@@ -1,11 +1,10 @@
-import os
 import mysql.connector
 import pandas as pd
 from configparser import NoSectionError
-from pathlib import Path
 from mysql.connector.errors import DatabaseError, OperationalError, InternalError
 from retrying import retry
 from sroka.api.mysql.mysql_helpers import validate_options, get_options_from_config
+from sroka.api.helpers import save_to_file
 
 
 @retry(stop_max_attempt_number=1,
@@ -72,21 +71,5 @@ def query_mysql(query: str, filename=None,
     # Otherwise, store it in a file.
     if not filename:
         return df
-
-    # Store the path in a cross-platform pathlib object to ensure compatibility
-    # with DOS & UNIX-based operating systems.
-    path = Path(filename)
-
-    # Get the parent directory of the given path, if it exists.
-    directory_path = str(path.parent.resolve())
-
-    # If the given path points to a folder, attempt to create it. If it already
-    # exists, the `exist_ok` option ensures that no exception will be thrown.
-    if directory_path != "":
-        os.makedirs(directory_path, exist_ok=True)
-
-    # Export the data in a CSV file.
-    try:
-        df.to_csv(filename)
-    except OSError as e:
-        print('Unable to write on filesystem: {}'.format(e))
+    else:
+        save_to_file(df, filename)
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,7 @@ Package providing simple Python access to data in: @@
     * AWS s3
     * MySQL
     * neo4j
+    * SPARQL
     Sroka library was checked to work for Python **>=3.8, <=3.11**.
@@ Expand Down @@