merge

Simple tool to quickly merge datasets for statistical analysis
git clone git://git.wrycode.com/wrycode/archive/merge.git
Log | Files | Refs | README | LICENSE

import.go (3481B)


      1 package main
      2 
      3 import (
      4 	"github.com/360EntSecGroup-Skylar/excelize"
      5 	"log"
      6 	"strings"
      7 	"path/filepath"
      8 	"os"
      9 	"encoding/csv"
     10 	"fmt"
     11 	"runtime"
     12 )
     13 
     14 // Dataset holds all of the variables and their data
     15 type Dataset struct {
     16 	data   map[string][]string // maps terms to ordered data (columns)
     17 	terms  []string            // ordered list of terms
     18 	name   string              // name of the dataset
     19 	height int                 // height of the XLSX file (including the first row of terms)
     20 }
     21 
     22 func ImportDataset(name string, rows [][]string) *Dataset {
     23 	// Initialize Dataset
     24 	var d Dataset
     25 	d.data = make(map[string][]string)
     26 
     27 	d.height = len(rows)
     28 
     29 	d.terms = rows[0]
     30 	d.name = name
     31 
     32 	// Create blank column for each term
     33 	for _, term := range d.terms {
     34 		d.data[term] = make([]string, d.height-1) // specimens start on the second row
     35 	}
     36 
     37 	// Fill in columns
     38 	for j, row := range rows[1:d.height] {
     39 		for i, value := range row {
     40 			d.data[d.terms[i]][j] = value
     41 		}
     42 	}
     43 	//	d.removeUnusedTerms()
     44 	return &d
     45 }
     46 
     47 // removeUnusedTerms clears unused terms from the term list
     48 func (d *Dataset) removeUnusedTerms() {
     49 	// find unused terms
     50 	var termsToRemove []string
     51 	for _, term := range d.terms {
     52 		if !notAllSame(d.data[term]) {
     53 			termsToRemove = append(termsToRemove, term)
     54 		}
     55 	}
     56 
     57 	// clear them
     58 	for _, term := range termsToRemove {
     59 		if Include(d.terms, term) {
     60 			d.terms = Remove(d.terms, term)
     61 		}
     62 	}
     63 }
     64 
     65 // func ImportDatasets(paths []string) map[string]*Dataset {
     66 // 	datasets := make(map[string]*Dataset)
     67 
     68 // 	for _, file := range paths {
     69 // 		// fmt.Println(file)
     70 // 		datasets[file] = ImportXLSX(file)
     71 // 	}
     72 // 	return datasets
     73 // }
     74 
     75 // chooses which export function to call based on file extension
     76 func exportDataset(d *Dataset, filename string) {
     77 	ext := strings.ToLower(filepath.Ext(filename))
     78 	fmt.Println(ext)
     79 	switch  ext {
     80 	case ".csv":
     81 		exportDatasetToCsv(d, filename)
     82 		break;
     83 	default:
     84 		exportDatasetToExcel(d, filename)
     85 	}
     86 }
     87 
     88 func exportDatasetToExcel(d *Dataset, filepath string) {
     89 	f := excelize.NewFile()
     90 	sheetName := f.GetSheetMap()[1]
     91 
     92 	for i, name := range d.terms {
     93 		// write the term
     94 		coord, err := excelize.CoordinatesToCellName(i+1,1 )
     95 		if err != nil {
     96 			log.Fatal("Trouble getting coaordinates:", err)
     97 		}
     98 		f.SetCellStr(sheetName,coord, name)
     99 
    100 		// write the rest of the column
    101 		for j, value := range d.data[name] {
    102 			coord, err := excelize.CoordinatesToCellName(i+1, j+2)
    103 			if err != nil {
    104 				log.Fatal("Trouble getting coaordinates:", err)
    105 			}
    106 			f.SetCellStr(sheetName, coord, value)
    107 		}
    108 	}
    109 
    110 	err := f.SaveAs(filepath)
    111 	if err != nil {
    112 		log.Fatal("Unable to save file", err)
    113 	}
    114 
    115 }
    116 
    117 func exportDatasetToCsv(d *Dataset, filepath string) {
    118 
    119 	err := os.RemoveAll(filepath)
    120 	if err != nil {
    121 		log.Fatal(err)
    122 	}
    123 
    124 	f, err := os.Create(filepath)
    125 	if err != nil {
    126 		log.Fatal("Cannot open '%s': %s\n", filepath, err.Error())
    127 		os.Exit(1)
    128 	}
    129 	defer f.Close()
    130 
    131 	w := csv.NewWriter(f)
    132 	// fix windows line endings
    133 	if runtime.GOOS == "windows" {
    134 		w.UseCRLF = true
    135 	}
    136 	w.Write(d.terms)                            // first line contains the terms in order
    137 	for i := range d.data[d.terms[0]] { // use the length of the first column as the number of rows
    138 		var row []string
    139 		for _, value := range d.terms { // for each term
    140 			row = append(row, d.data[value][i]) // add the value of the term for the current row
    141 		}
    142 
    143 		if err := w.Write(row); err != nil { // write the row
    144 			log.Fatal("error writing record to csv:", err)
    145 		}
    146 	}
    147 	w.Flush()
    148 	f.Sync()
    149 
    150 }