merge

Simple tool to quickly merge datasets for statistical analysis
git clone git://git.wrycode.com/wrycode/archive/merge.git
Log | Files | Refs | README | LICENSE

commit 7d8f8098c4feaad498ca7f325a8d43e33b0060c9
parent 65f98f26d64a261d4a5e0dc71ea3c343f81d1e2d
Author: Nick Econopouly <wry@mm.st>
Date:   Sun, 22 Mar 2020 17:13:42 -0400

Comment main.go and update file extension checking

Diffstat:
Mimport.go | 1-
Mmain.go | 209+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
2 files changed, 118 insertions(+), 92 deletions(-)

diff --git a/import.go b/import.go @@ -15,7 +15,6 @@ type Dataset struct { } func ImportDataset(name string, rows [][]string) *Dataset { - fmt.Println("name is ", name) // Initialize Dataset var d Dataset d.data = make(map[string][]string) diff --git a/main.go b/main.go @@ -16,8 +16,6 @@ import ( "github.com/gotk3/gotk3/gdk" ) -const EXPORT_BASE_FILENAME = "merged" - func windowSetup() *gtk.Window { os.Setenv("GSETTINGS_SCHEMA_DIR", ".\\share\\glib-2.0\\schemas") @@ -51,37 +49,79 @@ func windowSetup() *gtk.Window { } func rebuildDatasetListBox(list *gtk.ListBox, filenames *[]string, window *gtk.Window) { - // clear list list.GetChildren().Foreach(func(item interface{}) { list.Remove(item.(*gtk.Widget)) }) for _, filename := range *filenames { - poop := filename + // local variable necessary; see https://github.com/golang/go/wiki/CommonMistakes#using-goroutines-on-loop-iterator-variables + path := filename + + // box to hold the two elements box, err := gtk.BoxNew(gtk.ORIENTATION_HORIZONTAL, 2) + + // show the filename text := newLabel(filename) text.SetSelectable(true) box.PackStart(text, false, false, 0) + // add the "Remove" button removeButton, err := gtk.ButtonNewWithLabel("Remove") if err != nil { log.Fatal("Unable to create removeButton", err) } - + box.PackEnd(removeButton,false, false, 0) trashImage, err := gtk.ImageNewFromFile("trash.png") removeButton.SetImage(trashImage) - + + // "Remove button" removes the path from the list of filepaths and runs this function again _, err = removeButton.Connect("clicked", func() { - *filenames = Remove(*filenames, poop) + *filenames = Remove(*filenames, path) rebuildDatasetListBox(list, filenames, window) window.ShowAll() }) - - box.PackEnd(removeButton,false, false, 0) + + // add the two elements to the overall list list.Insert(box, 0) } } +func newHeadline(s string) *gtk.Label { + if len(s) < 115 { + var b bytes.Buffer + + for i := len(s); i < 115; i++ { + b.WriteString(" ") + } + s += b.String() + } + l := newLabel(s) + style, err := l.GetStyleContext() + if err != nil { + log.Fatal("error getting style context") + } + style.AddClass("section-title") + return l +} +func newLabel(s string) *gtk.Label { + l, err := gtk.LabelNew(s) + if err != nil { + log.Fatal("Unable to create label",s,":", err) + } + return l +} + +// very dumb file extension checking +func goodFileExtensions(filepaths *[]string) bool { + for _, path := range *filepaths { + ext := strings.ToLower(filepath.Ext(path)) + if ext != ".csv" && ext != ".xlsx" && ext != ".ods" { + return false + } + } + return true +} + func pullExcel(path string) [][]string { var rows [][]string @@ -146,6 +186,7 @@ func pullODS(path string) [][]string { } +// see import.go for Dataset fields and methods used in this file func mergeDatasets(base *Dataset, new *Dataset) { numOld := base.height - 1 numNew := new.height - 1 // number of specimens being added @@ -155,18 +196,16 @@ func mergeDatasets(base *Dataset, new *Dataset) { base.terms = append(base.terms, "DATASET") } - // add DATASET term + // add DATASET term to show which file the specimen comes from new.terms = append(new.terms, "DATASET") new.data["DATASET"] = make([]string, numNew) for i, _ := range new.data["DATASET"] { new.data["DATASET"][i] = new.name } - // fmt.Println(new.name) // add blank data for new terms not present in the old dataset for _, term := range new.terms { if !Include(base.terms, term) { - fmt.Println("base.terms is ", base.terms, " and term is", term) base.terms = append(base.terms, term) base.data[term] = make([]string, numOld) } @@ -175,25 +214,24 @@ func mergeDatasets(base *Dataset, new *Dataset) { // add blank data for old terms not present in the new dataset for _, term := range base.terms { if !Include(new.terms, term) { - fmt.Println("new.terms is ", new.terms, " and term is", term) new.terms = append(new.terms, term) new.data[term] = make([]string, numNew) } } - // datasets are equalized (no new data, just blank terms where - // appropriate) so merging them is a simple append + // datasets are equalized now (no new data, just blank terms + // where appropriate) so merging them is a simple append for _, term := range base.terms { base.data[term] = append(base.data[term], new.data[term]...) } + // update height base.height += numNew } func main() { window := windowSetup() - // main box container mainBox, err := gtk.BoxNew(gtk.ORIENTATION_VERTICAL, 5) @@ -203,99 +241,114 @@ func main() { mainBox.PackStart(newHeadline("Add datasets:"), false, false, 0) + // holds the paths to the datasets to be merged; this var is + // passed around when removing and adding datasets + filepaths := &[]string{} + + // listBox of dataset names + remove buttons datasetListBox, err := gtk.ListBoxNew() - // datasetListBox.Insert(newLabel("testing datasetList"), 0) mainBox.PackStart(datasetListBox,false, false, 0) - // holds the dataset filenames - selections := &[]string{} - // refresh the (empty) list of datasets for the first time - rebuildDatasetListBox(datasetListBox, selections, window) + rebuildDatasetListBox(datasetListBox, filepaths, window) + // add new datasets using native file chooser addButton, err := gtk.ButtonNewWithLabel("Add Dataset(s)") if err != nil { log.Fatal("unable to create addButton") } + mainBox.PackStart(addButton, false, false, 0) // native file chooser addDatasetDialog, err := gtk.FileChooserNativeDialogNew("open",window,gtk.FILE_CHOOSER_ACTION_OPEN,"open","cancel") - + // user can add multiple datasets addDatasetDialog.SetSelectMultiple(true) _, err = addButton.Connect("clicked", func() { _ = addDatasetDialog.Run() - // runButton.SetSensitive(true) - // fmt.Println(saveDialog.GetFilename()) list, err := addDatasetDialog.GetFilenames() if err != nil { log.Fatal("Error getting filenames") } + // list is a *glib.SList returned by GetFilenames. + // glib.SList.Foreach iterates over items in a list + // and provides unsafe.Pointers to the C data. Here we + // can convert the C []chars to Golang strings using + // cgo list.Foreach(func(ptr unsafe.Pointer) { filename := C.GoString((*C.char)(ptr)) - fmt.Println(filename) - *selections = append(*selections, filename) + // add path to list + *filepaths = append(*filepaths, filename) }) - rebuildDatasetListBox(datasetListBox, selections, window) + // refresh the view + rebuildDatasetListBox(datasetListBox, filepaths, window) window.ShowAll() }) + // merge datasets mergeButton, err := gtk.ButtonNewWithLabel("Merge Datasets") if err != nil { log.Fatal("Unable to create mergeButton", err) } + mainBox.PackStart(mergeButton, false, false, 0) - mainBox.PackStart(addButton, false, false, 0) - + errorLabel := newLabel("") + mainBox.PackStart(errorLabel, false, false, 0) + + // another native file chooser saveDialog, err := gtk.FileChooserNativeDialogNew("save",window,gtk.FILE_CHOOSER_ACTION_SAVE,"save","cancel") - mainBox.PackStart(mergeButton, false, false, 0) _, err = mergeButton.Connect("clicked", func() { - if checkFiles(selections) { + // clear old error + errorLabel.SetText("") + + if goodFileExtensions(filepaths) { _ = saveDialog.Run() outputFile := saveDialog.GetFilename() + if outputFile != ""{ - // supported file extensions and their associated function for pulling the raw data - fileFormat := map[string]func(string) [][]string{ - ".xlsx": pullExcel, - ".csv": pullCSV, - ".ods": pullODS, - } - - - // map of basenames to the [][]string data - raws := make(map[string][][]string) - - datasets := make(map[string]*Dataset) - - for _, path := range *selections { - basename := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) - ext := strings.ToLower(filepath.Ext(path)) + // supported file extensions and their associated function for pulling the raw data + fileFormat := map[string]func(string) [][]string{ + ".xlsx": pullExcel, + ".csv": pullCSV, + ".ods": pullODS, + } - if _, ok := fileFormat[ext]; ok { - raws[basename] = fileFormat[ext](path) + // map of basenames to the [][]string data + raws := make(map[string][][]string) + + // all datasets + datasets := make(map[string]*Dataset) + + // import dataset based on file extension + for _, path := range *filepaths { + basename := strings.TrimSuffix(filepath.Base(path), filepath.Ext(path)) + ext := strings.ToLower(filepath.Ext(path)) + if _, ok := fileFormat[ext]; ok { + raws[basename] = fileFormat[ext](path) + } + for name, data := range raws { + datasets[name] = ImportDataset(name, data) + } } - for name, data := range raws { - datasets[name] = ImportDataset(name, data) + + // merge them into a single Dataset + var dataset Dataset + dataset.height = 1 // row of terms, even though it's empty + dataset.data = make(map[string][]string) + for _, d := range datasets { + mergeDatasets(&dataset,d) } - - } - // merge them into a single Dataset - var dataset Dataset - dataset.height = 1 // row of terms, even though it's empty - dataset.data = make(map[string][]string) - for _, d := range datasets { - mergeDatasets(&dataset,d) + + // export dataset + exportDataset(&dataset, outputFile) } - // export dataset - fmt.Println(outputFile) - exportDataset(&dataset, outputFile) - + } else { + errorLabel.SetText("There was a problem with the files you chose. Make sure to choose supported spreadsheet formats.") } - }) window.Add(mainBox) @@ -304,30 +357,4 @@ func main() { gtk.Main() } -func checkFiles(selections *[]string) bool { - return true -} -func newHeadline(s string) *gtk.Label { - if len(s) < 115 { - var b bytes.Buffer - - for i := len(s); i < 115; i++ { - b.WriteString(" ") - } - s += b.String() - } - l := newLabel(s) - style, err := l.GetStyleContext() - if err != nil { - log.Fatal("error getting style context") - } - style.AddClass("section-title") - return l -} -func newLabel(s string) *gtk.Label { - l, err := gtk.LabelNew(s) - if err != nil { - log.Fatal("Unable to create label",s,":", err) - } - return l -} +