commit 151b56db47f3dfaf48024eb60c93e61c55d5fcf8
Author: Nick Econopouly <wry@mm.st>
Date: Tue, 21 Jan 2020 16:37:46 -0500
Initial commit.
Diffstat:
A | arrayutils.go | | | 42 | ++++++++++++++++++++++++++++++++++++++++++ |
A | import.go | | | 118 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | main.go | | | 74 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | winbuild.sh | | | 2 | ++ |
4 files changed, 236 insertions(+), 0 deletions(-)
diff --git a/arrayutils.go b/arrayutils.go
@@ -0,0 +1,42 @@
+package main
+
+// Index returns the first index of the target string t, or -1 if no
+// match is found
+func index(terms []string, s string) int {
+ for i, v := range terms {
+ if v == s {
+ return i
+ }
+ }
+ return -1
+}
+
+// Include returns true if the target string t is in the slice
+func Include(terms []string, term string) bool {
+ return index(terms, term) >= 0
+}
+
+// Remove returns a []string with all instances of term removed, or
+// unchanged if the term isn't in the slice
+func Remove(terms []string, term string) []string {
+ var result []string
+ for _, t := range terms {
+ if t != term {
+ result = append(result, t)
+ }
+ }
+ if result != nil {
+ return result
+ }
+ return []string{}
+}
+
+// notAllSame returns true if not every element of a string slice is the same
+func notAllSame(s []string) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] != s[0] {
+ return true
+ }
+ }
+ return false
+}
diff --git a/import.go b/import.go
@@ -0,0 +1,118 @@
+package main
+
+import (
+ "fmt"
+ "github.com/360EntSecGroup-Skylar/excelize"
+ "log"
+)
+
+// Dataset holds all of the variables and their data
+type Dataset struct {
+ data map[string][]string // maps terms to ordered data (columns)
+ terms []string // ordered list of terms
+ name string // name of the dataset
+ height int // height of the XLSX file (including the first row!)
+}
+
+// ImportXLSX imports an XLSX file. It takes filename, sheet name,
+// name for the Dataset internally, and height in rows as arguments
+// and returns a *Dataset.
+func ImportXLSX(filename string) *Dataset {
+ // Initialize Dataset
+ var d Dataset
+ d.data = make(map[string][]string)
+ d.name = filename
+ // d.height = height
+
+ // pull data from file
+ f, err := excelize.OpenFile(filename)
+ if err != nil {
+ fmt.Println("Problem importing ", filename, err)
+ return &d
+ }
+
+ sheetName := f.GetSheetMap()[1]
+ fmt.Println(sheetName)
+
+ rows, err := f.GetRows(sheetName)
+
+ if err != nil {
+ fmt.Println("Cannot read XLSX data in", filename, err.Error())
+ return &d
+ }
+ d.height = len(rows)
+
+ d.terms = rows[0]
+
+ // Create blank column for each term
+ for _, term := range d.terms {
+ d.data[term] = make([]string, d.height-1) // specimens start on the second row
+ }
+
+ // Fill in columns
+ for j, row := range rows[1:d.height] {
+ for i, value := range row {
+ d.data[d.terms[i]][j] = value
+ }
+ }
+ // d.removeUnusedTerms()
+ return &d
+}
+
+// removeUnusedTerms clears unused terms from the term list
+func (d *Dataset) removeUnusedTerms() {
+ // find unused terms
+ var termsToRemove []string
+ for _, term := range d.terms {
+ if !notAllSame(d.data[term]) {
+ termsToRemove = append(termsToRemove, term)
+ }
+ }
+
+ // clear them
+ for _, term := range termsToRemove {
+ if Include(d.terms, term) {
+ d.terms = Remove(d.terms, term)
+ }
+ }
+}
+
+func ImportDatasets(paths []string) map[string]*Dataset {
+ datasets := make(map[string]*Dataset)
+
+ for _, file := range paths {
+ // fmt.Println(file)
+ datasets[file] = ImportXLSX(file)
+ }
+ return datasets
+}
+
+func exportDataset(d *Dataset, name string) {
+ f := excelize.NewFile()
+ sheetName := f.GetSheetMap()[1]
+
+ for i, name := range d.terms {
+ // write the term
+ coord, err := excelize.CoordinatesToCellName(i+1,1 )
+ if err != nil {
+ log.Fatal("Trouble getting coaordinates:", err)
+ }
+ f.SetCellStr(sheetName,coord, name)
+
+ // write the rest of the column
+ for j, value := range d.data[name] {
+ coord, err := excelize.CoordinatesToCellName(i+1, j+2)
+ if err != nil {
+ log.Fatal("Trouble getting coaordinates:", err)
+ }
+ f.SetCellStr(sheetName, coord, value)
+ }
+ }
+
+ err := f.SaveAs("merged.xlsx")
+ if err != nil {
+ log.Fatal("Unable to save file", err)
+ }
+
+}
+
diff --git a/main.go b/main.go
@@ -0,0 +1,74 @@
+package main
+
+import (
+ "fmt"
+ "os"
+ "log"
+ "path/filepath"
+)
+
+func getExcelFiles() []string {
+ var files []string
+
+ path, err := os.Getwd()
+ if err != nil {
+ log.Println(err)
+ }
+
+ fmt.Println("Attempting to merge Microsoft Excel files in ", path)
+
+ err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
+ if info.IsDir() {
+ return nil
+ }
+ if filepath.Base(path) == "merged.xlsx" { // ignore our output file
+ return nil
+ }
+ if filepath.Ext(path) == ".xlsx" || filepath.Ext(path) == ".XLSX" {
+ files = append(files, path)
+ }
+ return nil
+ })
+ if err != nil {
+ panic(err)
+ }
+ return files
+}
+
+func mergeDatasets(base *Dataset, new *Dataset) {
+ numOld := base.height - 1
+ numNew := new.height - 1 // number of specimens being added
+
+ for _, term := range new.terms {
+ if !Include(base.terms, term) {
+ base.terms = append(base.terms, term)
+ base.data[term] = make([]string, numOld)
+ }
+
+ }
+
+ for _, term := range base.terms {
+ base.data[term] = append(base.data[term], new.data[term]...)
+ }
+
+ base.height += numNew
+}
+
+func main() {
+ // get list of excel files
+ files := getExcelFiles()
+
+ // import them all into Datasets
+ datasets := ImportDatasets(files)
+
+ // merge them into a single Dataset
+ var dataset Dataset
+ dataset.height = 1 // row of terms, even though it's empty
+ dataset.data = make(map[string][]string)
+ for _, d := range datasets {
+ mergeDatasets(&dataset,d)
+ }
+
+ // export dataset
+ exportDataset(&dataset, "merged.xlsx")
+}
diff --git a/winbuild.sh b/winbuild.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+GOOS=windows GOARCH=amd64 go build