doc nit

yuming-long · yuming-long · commit 2836e2db7f66 · 2023-06-09T11:39:24.000-04:00
diff --git a/unstructured/partition/auto.py b/unstructured/partition/auto.py
@@ -70,8 +70,8 @@ def partition(
     include_page_breaks
         If True, the output will include page breaks if the filetype supports it
     strategy
-        The strategy to use for partitioning the PDF. Uses a layout detection model if set
-        to 'hi_res', otherwise partition_pdf simply extracts the text from the document
+        The strategy to use for partitioning PDF/image. Uses a layout detection model if set
+        to 'hi_res', otherwise partition simply extracts the text from the document
         and processes it.
     encoding
         The encoding method used to decode the text input. If None, utf-8 will be used.
diff --git a/unstructured/partition/image.py b/unstructured/partition/image.py
@@ -35,10 +35,12 @@ def partition_image(
         The languages to use for the Tesseract agent. To use a language, you'll first need
         to install the appropriate Tesseract language pack.
     strategy
-        The strategy to use for partitioning the PDF. Valid strategies are "hi_res" and
+        The strategy to use for partitioning the image. Valid strategies are "hi_res" and
         "ocr_only". When using the "hi_res" strategy, the function uses a layout detection
         model if to identify document elements. When using the "ocr_only" strategy,
         partition_image simply extracts the text from the document using OCR and processes it.
+        The default strategy `auto` will determine when a image can be extracted using `ocr_only` mode,
+        otherwise it will fall back to `hi_res`.
     """
     exactly_one(filename=filename, file=file)
 
diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
@@ -57,9 +57,10 @@ def partition_pdf(
         The strategy to use for partitioning the PDF. Valid strategies are "hi_res",
         "ocr_only", and "fast". When using the "hi_res" strategy, the function uses
         a layout detection model to identify document elements. When using the
-        "ocr_only" strategy, partition_image simply extracts the text from the
+        "ocr_only" strategy, partition_pdf simply extracts the text from the
         document using OCR and processes it. If the "fast" strategy is used, the text
-        is extracted directly from the PDF.
+        is extracted directly from the PDF. The default strategy `auto` will determine
+        when a page can be extracted using `fast` mode, otherwise it will fall back to `hi_res`.
     infer_table_structure
         Only applicable if `strategy=hi_res`.
         If True, any Table elements that are extracted will also have a metadata field