Skip to content

Commit

Permalink
Added Custom Outputter that writes the column names
Browse files Browse the repository at this point in the history
Added Custom Outputter that writes the column names and optionally their
types.

Writing the data is left to the reader.
  • Loading branch information
MikeRys committed Jan 2, 2016
1 parent eff7730 commit 9ad8a9d
Show file tree
Hide file tree
Showing 9 changed files with 553 additions and 0 deletions.
63 changes: 63 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto

###############################################################################
# Set default behavior for command prompt diff.
#
# This is need for earlier builds of msysgit that does not have it on by
# default for csharp files.
# Note: This is only used by command line
###############################################################################
#*.cs diff=csharp

###############################################################################
# Set the merge driver for project and solution files
#
# Merging from the command prompt will add diff markers to the files if there
# are conflicts (Merging from VS is not affected by the settings below, in VS
# the diff markers are never inserted). Diff markers may cause the following
# file extensions to fail to load in VS. An alternative would be to treat
# these files as binary and thus will always conflict and require user
# intervention with every merge. To do so, just uncomment the entries below
###############################################################################
#*.sln merge=binary
#*.csproj merge=binary
#*.vbproj merge=binary
#*.vcxproj merge=binary
#*.vcproj merge=binary
#*.dbproj merge=binary
#*.fsproj merge=binary
#*.lsproj merge=binary
#*.wixproj merge=binary
#*.modelproj merge=binary
#*.sqlproj merge=binary
#*.wwaproj merge=binary

###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
#*.jpg binary
#*.png binary
#*.gif binary

###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
#*.doc diff=astextplain
#*.DOC diff=astextplain
#*.docx diff=astextplain
#*.DOCX diff=astextplain
#*.dot diff=astextplain
#*.DOT diff=astextplain
#*.pdf diff=astextplain
#*.PDF diff=astextplain
#*.rtf diff=astextplain
#*.RTF diff=astextplain
28 changes: 28 additions & 0 deletions Examples/HeaderOutputter/HeaderOutputter.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2013
VisualStudioVersion = 12.0.31101.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{182E2583-ECAD-465B-BB50-91101D7C24CE}") = "HeaderOutputterScript", "HeaderOutputterScript\HeaderOutputterScript.usqlproj", "{C8894D8F-1CA5-45BB-9CDA-726A86DEAC9A}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeaderOutputter", "HeaderOutputter\HeaderOutputter.csproj", "{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{C8894D8F-1CA5-45BB-9CDA-726A86DEAC9A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C8894D8F-1CA5-45BB-9CDA-726A86DEAC9A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C8894D8F-1CA5-45BB-9CDA-726A86DEAC9A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C8894D8F-1CA5-45BB-9CDA-726A86DEAC9A}.Release|Any CPU.Build.0 = Release|Any CPU
{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
117 changes: 117 additions & 0 deletions Examples/HeaderOutputter/HeaderOutputter/Class1.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright 2016 Microsoft Corp.
// Author: Michael Rys (mrys)

using Microsoft.Analytics.Interfaces;
using Microsoft.Analytics.Types.Sql;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

// Sample Outputter that just outputs the rowset schema as "header". It always writes the names of the columns and provides the option to write the types in a second row.
// It does not output the content of the file (That is left as an exercise to the reader)
//
// At this point, the outputter requires atomic file processing (ie, the file is not split into parallel extends).
// Once the Output model provides a way to identify the beginning of the file, this restriction can be removed.
//
// USAGE examples:
//
// OUTPUT @res USING new HeaderOutputter.HeaderOutputter(quoting:true, with_types:true, encoding:Encoding.Unicode);
// OUTPUT @res USING HeaderOutputter.Factory.Columns();
// OUTPUT @res USING HeaderOutputter.Factory.ColumnsAndTypes();

namespace HeaderOutputter
{
[SqlUserDefinedOutputter(AtomicFileProcessing = true)]
public class HeaderOutputter : IOutputter
{
private string _row_delim;
private char _col_delim;
private bool _with_types;
private Encoding _encoding;
private bool _quoting;
private bool _first_row_written = false; // Makes sure we only write one header per file

// Parameter initialization
//
// row_delim sets the characters to separate the rows. Default: \r\n.
// col_delim sets the character to separate the columns. Default: ','.
// with_types indicates whether the type row should be included. Default: false.
// quoting indicates whether the column content is quoted with double quotes (and double quotes will be doubled). Default: true
// encoding sets the encoding used to set the file's encoding. Default: UTF8.
//
public HeaderOutputter(string row_delim = "\r\n", char col_delim = ',', bool with_types = false, bool quoting = true, Encoding encoding = null)
{
this._encoding = ((encoding == null) ? Encoding.UTF8 : encoding);
this._row_delim = row_delim;
this._col_delim = col_delim;
this._with_types = with_types;
this._quoting = quoting;
}

// AddQuotes
//
// Quotes the provided string with double quotes and doubles the contained double quotes.
//
public static string AddQuotes(string s)
{
return "\"" + s.Replace("\"", "\"\"") + "\"";
}

// Output
//
// Outputs the names of the rowset columns in a column separated row and optionally adds their types in a second row.
//
public override void Output(IRow row, IUnstructuredWriter output)
{
if (_first_row_written) { return; }
using (StreamWriter streamWriter = new StreamWriter(output.BaseStream, this._encoding))
{
streamWriter.NewLine = this._row_delim;
ISchema schema = row.Schema;
for (int i = 0; i < schema.Count(); i++)
{
var col = schema[i];
if (i > 0)
{
streamWriter.Write(this._col_delim);
}
var val = _quoting ? AddQuotes(col.Name) : col.Name;
streamWriter.Write(val);
}
streamWriter.WriteLine();
if (_with_types)
{
for (int i = 0; i < schema.Count(); i++)
{
var col = schema[i];
if (i > 0)
{
streamWriter.Write(this._col_delim);
}
var val = _quoting ? AddQuotes(col.Type.FullName) : col.Type.FullName;
streamWriter.Write(val);
}
streamWriter.WriteLine();
}
}
_first_row_written = true;
}
}

// Define the factory classes
public static class Factory
{
public static HeaderOutputter Columns(string row_delim = "\r\n", char col_delim = ',', bool quoting = true, Encoding encoding = null)
{
return new HeaderOutputter(row_delim, col_delim, false, quoting, encoding);
}

public static HeaderOutputter ColumnsAndTypes(string row_delim = "\r\n", char col_delim = ',', bool quoting = true, Encoding encoding = null)
{
return new HeaderOutputter(row_delim, col_delim, true, quoting, encoding);
}
}
}
56 changes: 56 additions & 0 deletions Examples/HeaderOutputter/HeaderOutputter/HeaderOutputter.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<SchemaVersion>2.0</SchemaVersion>
<ProjectTypeGuids>{416D63FD-0477-49AA-A954-A7C5B95A9B51};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
<ProjectGuid>{1B3E7106-6D16-4B96-87C5-F15E18FFC08F}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>HeaderOutputter</RootNamespace>
<AssemblyName>HeaderOutputter</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
<Reference Include="Microsoft.Analytics.Interfaces" />
<Reference Include="Microsoft.Analytics.Types" />
</ItemGroup>
<ItemGroup>
<Compile Include="Class1.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("HeaderOutputter")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("HeaderOutputter")]
[assembly: AssemblyCopyright("Copyright © 2015")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]

// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]

// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("c6f8d3a1-88d8-44c3-b573-37199407f8ea")]

// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Some samples/test cases for the HeaderOutputter
// With code behind

// Create some interesting rowsets
@simple_rs =
SELECT * FROM (VALUES(1, "string", (int?) 2, DateTime.Parse("2015-12-15"))
, (2, "string2", (int?) 3, DateTime.Parse("2015-12-18"))
) AS T(i, str, ni, dt);

@quoted_rs =
SELECT *
FROM(
VALUES
(
1,
"string",
(int?) 2,
DateTime.Parse("2015-12-15")
)) AS T([i], [string], [nullable int], [^] );// BUGBUG [个]);

// Still to do, user-defined types
@complex_rs =
SELECT * FROM (VALUES( (new SQL.ARRAY<int>{1,2})
, (new SQL.MAP<string, int?>{{"key", 42}})
, (new SQL.MAP<string, SQL.ARRAY<string>>{{"key", new SQL.ARRAY<string>{"a","b"}}})
, new SQL.ARRAY<SQL.MAP<int,int?>>{new SQL.MAP<int,int?>{{1,2},{2,3}}}
)
) AS T(a_int, m_s_i, m_s_as, a_mii);

OUTPUT @simple_rs
TO "/output/simple.txt"
USING new HeaderOutputter.HeaderOutputter(quoting:false, with_types:true);

OUTPUT @quoted_rs
TO "/output/quoted.txt"
USING new HeaderOutputter.HeaderOutputter(quoting:true, with_types:true, encoding:Encoding.Unicode);

OUTPUT @quoted_rs
TO "/output/quoted_notypes_factory.txt"
USING HeaderOutputter.Factory.Columns(quoting:true, encoding:Encoding.Unicode);

OUTPUT @quoted_rs
TO "/output/quoted_types_factory.txt"
USING HeaderOutputter.Factory.ColumnsAndTypes(quoting:true, encoding:Encoding.Unicode);

OUTPUT @complex_rs
TO "/output/complex.txt"
USING new HeaderOutputter.HeaderOutputter(quoting:true, with_types:true);

Loading

0 comments on commit 9ad8a9d

Please sign in to comment.