From 0ff660598a4ebafa21581f3485222d4ffb6c5399 Mon Sep 17 00:00:00 2001 From: Kate W Date: Tue, 12 Nov 2019 13:03:20 -0600 Subject: [PATCH] added gitignore & tidyr --- .gitignore | 7 +++++++ Collections-OCR.Rproj | 13 +++++++++++++ ocrMangle.R | 3 ++- 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 Collections-OCR.Rproj diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6c0da7c --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata + +*.jpg +*.csv \ No newline at end of file diff --git a/Collections-OCR.Rproj b/Collections-OCR.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/Collections-OCR.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/ocrMangle.R b/ocrMangle.R index 07097db..c62e106 100644 --- a/ocrMangle.R +++ b/ocrMangle.R @@ -3,7 +3,7 @@ # (c) 2019 The Field Museum - MIT License (https://opensource.org/licenses/MIT) # https://github.com/fieldmuseum/Collections-OCR - +library(tidyr) library(magick) library(stringr) library(tesseract) @@ -53,6 +53,7 @@ ocrText <- separate(imagesOCR, text, sep = "\n", extra = "merge", fill = "right") + # export CSV write.csv(ocrText, paste0("ocrText-",