-
Notifications
You must be signed in to change notification settings - Fork 13
Open
Description
The type analyzer can sometimes drop columns that are present in the input from it's analysis. For instance:
const fullMoviesDataset = require('vega-datasets/data/movies.json');
const {Analyzer} = require('type-analyzer');
const {computeColMeta} = Analyzer;
const moviesDataSample = [
{
Title: 'The Land Girls',
'US Gross': 146083,
'Worldwide Gross': 146083,
'US DVD Sales': null,
'Production Budget': 8000000,
'Release Date': 'Jun 12 1998',
'MPAA Rating': 'R',
'Running Time min': null,
Distributor: 'Gramercy',
Source: null,
'Major Genre': null,
'Creative Type': null,
Director: null,
'Rotten Tomatoes Rating': null,
'IMDB Rating': 6.1,
'IMDB Votes': 1071,
},
{
Title: 'First Love, Last Rites',
'US Gross': 10876,
'Worldwide Gross': 10876,
'US DVD Sales': null,
'Production Budget': 300000,
'Release Date': 'Aug 07 1998',
'MPAA Rating': 'R',
'Running Time min': null,
Distributor: 'Strand',
Source: null,
'Major Genre': 'Drama',
'Creative Type': null,
Director: null,
'Rotten Tomatoes Rating': null,
'IMDB Rating': 6.9,
'IMDB Votes': 207,
},
{
Title: 'I Married a Strange Person',
'US Gross': 203134,
'Worldwide Gross': 203134,
'US DVD Sales': null,
'Production Budget': 250000,
'Release Date': 'Aug 28 1998',
'MPAA Rating': null,
'Running Time min': null,
Distributor: 'Lionsgate',
Source: null,
'Major Genre': 'Comedy',
'Creative Type': null,
Director: null,
'Rotten Tomatoes Rating': null,
'IMDB Rating': 6.8,
'IMDB Votes': 865,
},
{
Title: "Let's Talk About Sex",
'US Gross': 373615,
'Worldwide Gross': 373615,
'US DVD Sales': null,
'Production Budget': 300000,
'Release Date': 'Sep 11 1998',
'MPAA Rating': null,
'Running Time min': null,
Distributor: 'Fine Line',
Source: null,
'Major Genre': 'Comedy',
'Creative Type': null,
Director: null,
'Rotten Tomatoes Rating': 13,
'IMDB Rating': null,
'IMDB Votes': null,
},
{
Title: 'Slam',
'US Gross': 1009819,
'Worldwide Gross': 1087521,
'US DVD Sales': null,
'Production Budget': 1000000,
'Release Date': 'Oct 09 1998',
'MPAA Rating': 'R',
'Running Time min': null,
Distributor: 'Trimark',
Source: 'Original Screenplay',
'Major Genre': 'Drama',
'Creative Type': 'Contemporary Fiction',
Director: null,
'Rotten Tomatoes Rating': 62,
'IMDB Rating': 3.4,
'IMDB Votes': 165,
},
{
Title: 'Mississippi Mermaid',
'US Gross': 24551,
'Worldwide Gross': 2624551,
'US DVD Sales': null,
'Production Budget': 1600000,
'Release Date': 'Jan 15 1999',
'MPAA Rating': null,
'Running Time min': null,
Distributor: 'MGM',
Source: null,
'Major Genre': null,
'Creative Type': null,
Director: null,
'Rotten Tomatoes Rating': null,
'IMDB Rating': null,
'IMDB Votes': null,
},
];
// produces incorrect output
console.log(computeColMeta(moviesDataSample).map(x => x.key));
// ['Title','US Gross','Worldwide Gross','Production Budget','Release Date','MPAA Rating','Distributor','Source','Major Genre','Creative Type','Rotten Tomatoes Rating','IMDB Rating','IMDB Votes']
// missing 'US DVD Sales', 'Running Time min', and 'Director'
// produces different incorrect output!
console.log(computeColMeta(fullMoviesDataset).map(x => x.key));
// [ 'US Gross', 'Worldwide Gross', 'US DVD Sales', 'Production Budget', 'Release Date', 'MPAA Rating', 'Running Time min', 'Distributor', 'Source', 'Major Genre', 'Creative Type', 'Director', 'Rotten Tomatoes Rating', 'IMDB Rating', 'IMDB Votes'];
// missing title!A first glance seems to suggest that this is tied to values being null (which if so would simply require paging forward in the dataset until a null is found). A reasonable default behavior if TypeAnalyzer can't figure out what to do would be to return an innocuous blank type such as DIMENSION/String
Metadata
Metadata
Assignees
Labels
No labels