From 67eaf66b3963c36154ca5c7e70d191679e8b36e7 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Thu, 3 Jun 2021 22:24:24 +0100 Subject: [PATCH 1/8] WIP: Incomplete SegmentIndices --- .../scala/com/spotify/featran/CanBuild.scala | 8 +++ .../featran/CrossingFeatureBuilder.scala | 14 ++++ .../com/spotify/featran/FeatureBuilder.scala | 10 +++ .../com/spotify/featran/FlatConverter.scala | 2 + .../com/spotify/featran/FlatExtractor.scala | 2 + .../com/spotify/featran/FloatingPoint.scala | 5 +- .../com/spotify/featran/json/Implicits.scala | 6 ++ .../featran/transformers/Normalizer.scala | 2 +- .../featran/transformers/SegmentIndices.scala | 71 +++++++++++++++++++ .../transformers/SegmentIndicesSpec.scala | 41 +++++++++++ .../spotify/featran/tensorflow/package.scala | 10 +++ 11 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala create mode 100644 core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala diff --git a/core/src/main/scala/com/spotify/featran/CanBuild.scala b/core/src/main/scala/com/spotify/featran/CanBuild.scala index edfa06ba..ae582799 100644 --- a/core/src/main/scala/com/spotify/featran/CanBuild.scala +++ b/core/src/main/scala/com/spotify/featran/CanBuild.scala @@ -62,4 +62,12 @@ object CanBuild { implicit def arrayCB[T: ClassTag]: CanBuild[T, Array] = new CanBuild[T, Array] { override def apply(): mutable.Builder[T, Array[T]] = Array.newBuilder[T] } + + implicit def IntSeqCB: CanBuild[Int, Seq] = new CanBuild[Int, Seq] { + override def apply(): mutable.Builder[Int, Seq[Int]] = Seq.newBuilder + } + + implicit def IntArrayCB: CanBuild[Int, Array] = new CanBuild[Int, Array] { + override def apply(): mutable.Builder[Int, Array[Int]] = Array.newBuilder[Int] + } } diff --git a/core/src/main/scala/com/spotify/featran/CrossingFeatureBuilder.scala b/core/src/main/scala/com/spotify/featran/CrossingFeatureBuilder.scala index dea8383c..b92fbf9a 100644 --- a/core/src/main/scala/com/spotify/featran/CrossingFeatureBuilder.scala +++ b/core/src/main/scala/com/spotify/featran/CrossingFeatureBuilder.scala @@ -117,6 +117,20 @@ private class CrossingFeatureBuilder[F] private ( } fb.add(names, values) } + + override def addInts[M[_]](names: Iterable[String], values: M[Int])(implicit + ev: M[Int] => Seq[Int]): Unit = { + if (xEnabled) { + val i = names.iterator + val j = values.iterator + while (i.hasNext && j.hasNext) { + xQueue.enqueue(CrossValue(i.next(), xOffset, j.next())) + xOffset += 1 + } + } + fb.addInts(names, values) + } + override def skip(): Unit = { xOffset += 1 fb.skip() diff --git a/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala b/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala index 4200b77a..0bf5b223 100644 --- a/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala +++ b/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala @@ -109,6 +109,16 @@ object FeatureRejection { } } + def addInts[M[_]](names: Iterable[String], values: M[Int])(implicit + ev: M[Int] => Seq[Int] + ): Unit = { + val i = names.iterator + val j = values.iterator + while (i.hasNext && j.hasNext) { + add(i.next(), j.next()) + } + } + /** * Skip multiple feature values. The total number of values added and skipped should equal to * dimension in [[init]]. diff --git a/core/src/main/scala/com/spotify/featran/FlatConverter.scala b/core/src/main/scala/com/spotify/featran/FlatConverter.scala index da4afd5c..3705cec4 100644 --- a/core/src/main/scala/com/spotify/featran/FlatConverter.scala +++ b/core/src/main/scala/com/spotify/featran/FlatConverter.scala @@ -42,6 +42,8 @@ import scala.annotation.implicitNotFound def writeStrings(name: String): Option[Seq[String]] => IF + def writeIntArray(name: String): Option[Array[Int]] => IF + def writer: Seq[IF] => T } diff --git a/core/src/main/scala/com/spotify/featran/FlatExtractor.scala b/core/src/main/scala/com/spotify/featran/FlatExtractor.scala index 8cbaa109..9c2e355c 100644 --- a/core/src/main/scala/com/spotify/featran/FlatExtractor.scala +++ b/core/src/main/scala/com/spotify/featran/FlatExtractor.scala @@ -43,6 +43,8 @@ import scala.annotation.implicitNotFound def readString(name: String): T => Option[String] def readStrings(name: String): T => Option[Seq[String]] + + def readIntArray(name: String): T => Option[Array[Int]] } object FlatReader { diff --git a/core/src/main/scala/com/spotify/featran/FloatingPoint.scala b/core/src/main/scala/com/spotify/featran/FloatingPoint.scala index b1c4912d..c58a5aa3 100644 --- a/core/src/main/scala/com/spotify/featran/FloatingPoint.scala +++ b/core/src/main/scala/com/spotify/featran/FloatingPoint.scala @@ -23,7 +23,7 @@ import scala.annotation.implicitNotFound /** Type class for floating point primitives. */ @implicitNotFound("Could not find an instance of FloatingPoint for ${T}") -@typeclass trait FloatingPoint[@specialized(Float, Double) T] extends Serializable { +@typeclass trait FloatingPoint[@specialized(Float, Double, Int) T] extends Serializable { def fromDouble(x: Double): T } @@ -34,6 +34,9 @@ object FloatingPoint { implicit val doubleFP: FloatingPoint[Double] = new FloatingPoint[Double] { override def fromDouble(x: Double): Double = x } + implicit val intFP: FloatingPoint[Int] = new FloatingPoint[Int] { + override def fromDouble(x: Double): Int = x.toInt + } /* ======================================================================== */ /* THE FOLLOWING CODE IS MANAGED BY SIMULACRUM; PLEASE DO NOT EDIT!!!! */ diff --git a/core/src/main/scala/com/spotify/featran/json/Implicits.scala b/core/src/main/scala/com/spotify/featran/json/Implicits.scala index 92264f88..73f1d0e9 100644 --- a/core/src/main/scala/com/spotify/featran/json/Implicits.scala +++ b/core/src/main/scala/com/spotify/featran/json/Implicits.scala @@ -110,6 +110,9 @@ private[featran] trait Implicits extends Serializable { override def readStrings(name: String): String => Option[Seq[String]] = toFeature[Seq[String]](name) + + override def readIntArray(name: String): String => Option[Array[Int]] = + toFeature[Array[Int]](name) } implicit val jsonFlatWriter: FlatWriter[String] = new FlatWriter[String] { @@ -138,6 +141,9 @@ private[featran] trait Implicits extends Serializable { override def writeStrings(name: String): Option[Seq[String]] => (String, Option[Json]) = (v: Option[Seq[String]]) => (name, v.map(_.asJson)) + override def writeIntArray(name: String): Option[Array[Int]] => (String, Option[Json]) = + (v: Option[Array[Int]]) => (name, v.map(_.asJson)) + override def writer: Seq[(String, Option[Json])] => String = _.asJson.noSpaces } diff --git a/core/src/main/scala/com/spotify/featran/transformers/Normalizer.scala b/core/src/main/scala/com/spotify/featran/transformers/Normalizer.scala index 016942b0..a21efff9 100644 --- a/core/src/main/scala/com/spotify/featran/transformers/Normalizer.scala +++ b/core/src/main/scala/com/spotify/featran/transformers/Normalizer.scala @@ -45,7 +45,7 @@ object Normalizer extends SettingsBuilder { new Normalizer(name, p, expectedLength) /** - * Create a new [[OneHotEncoder]] from a settings object + * Create a new [[Normalizer]] from a settings object * @param setting Settings object */ def fromSettings(setting: Settings): Transformer[Array[Double], Int, Int] = { diff --git a/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala new file mode 100644 index 00000000..78b52bab --- /dev/null +++ b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala @@ -0,0 +1,71 @@ +package com.spotify.featran.transformers + +import com.spotify.featran.{FeatureBuilder, FeatureRejection, FlatReader, FlatWriter} +import com.twitter.algebird.Aggregator + +object SegmentIndices extends SettingsBuilder { + + /** + * Create a new [[SegmentIndices]] instance. + * @param expectedLength expected length of the input vectors, or 0 to infer from data + */ + def apply( + name: String, + expectedLength: Int = 0 + ): Transformer[Array[Int], Int, Int] = + new SegmentIndices(name, expectedLength) + + /** + * Create a new [[SegmentIndices]] from a settings object + * @param setting Settings object + */ + def fromSettings(setting: Settings): Transformer[Array[Int], Int, Int] = { + val expectedLength = setting.params("expectedLength").toInt + SegmentIndices(setting.name, expectedLength) + } +} + +private[featran] class SegmentIndices(name: String, expectedLength: Int = 0) + extends Transformer[Array[Int], Int, Int](name) { + + override val aggregator: Aggregator[Array[Int], Int, Int] = + Aggregators.seqLength(expectedLength) + override def featureDimension(c: Int): Int = c + override def featureNames(c: Int): Seq[String] = names(c) + + override def buildFeatures(a: Option[Array[Int]], c: Int, fb: FeatureBuilder[_]): Unit = a match { + //TODO: Require increasing input (non-strict monotonic) + case Some(x) if (x.length != c) => + fb.skip(c) + fb.reject(this, FeatureRejection.WrongDimension(c, x.length)) + case Some(x) => { + val copyOfX = x.clone() + + var tmp: Int = 0 + copyOfX(0) = 0 //TODO: Set first element to 0, Guard against head being empty or non-zero + + for (index <- 1 until copyOfX.length) { //Skip 0th! + val inputValue = copyOfX(index) + + if (inputValue == tmp) + copyOfX(index) = copyOfX(index - 1) + 1 + else + copyOfX(index) = 0 + tmp = inputValue + } + + fb.addInts(names = names(c), values = copyOfX) + } + case None => fb.skip(c) + } + + override def encodeAggregator(c: Int): String = c.toString + override def decodeAggregator(s: String): Int = s.toInt + override def params: Map[String, String] = + Map("expectedLength" -> expectedLength.toString) + + override def flatRead[T: FlatReader]: T => Option[Any] = FlatReader[T].readIntArray(name) + + override def flatWriter[T](implicit fw: FlatWriter[T]): Option[Array[Int]] => fw.IF = fw.writeIntArray(name) + +} diff --git a/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala new file mode 100644 index 00000000..ddd0f8eb --- /dev/null +++ b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala @@ -0,0 +1,41 @@ +package com.spotify.featran.transformers + +import com.spotify.featran.FeatureSpec +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers + +object SegmentIndicesSpec extends AnyFlatSpec with Matchers { + + def main(args: Array[String]): Unit = { + def randomMonotonicIncreasingArray(): Array[Int] = { + val emptyArray = Array.fill(10)(0) + for (index <- 1 until emptyArray.length) { + if (math.random() > 0.5) { + emptyArray(index) = emptyArray(index - 1) + 1 + } else + emptyArray(index) = emptyArray(index - 1) + } + emptyArray + } + + val hundredRandomArrays = (1 to 100).toList.map(_ => randomMonotonicIncreasingArray()) + + val segmentedIndices = FeatureSpec + .of[Array[Int]] + .required(identity)(SegmentIndices("segmented")) + + val expected = hundredRandomArrays.map { testCase => + testCase.groupBy(identity).toSeq.sortBy(_._1).flatMap { case (_, sameNumber) => + sameNumber.indices.toList } + } + + val result = segmentedIndices.extract(hundredRandomArrays).featureValues[Seq[Int]] + + + result should equal(expected) + } +// property("default") = Prop.forAll(list[Array[Int]].arbitrary) { +// (xs) => //Need test input to be non-strict monotonic increasing +// +// } +} diff --git a/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala b/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala index dcae37dd..ca4fd122 100644 --- a/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala +++ b/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala @@ -104,6 +104,9 @@ package object tensorflow { def fromStrings(xs: Seq[String]): tf.Feature.Builder = fromByteStrings(xs.map(ByteString.copyFromUtf8)) + def toInts(f: tf.Feature): Array[Int] = toFloats(f).map(_.toInt).toArray + + def fromInts(xs: Array[Int]): tf.Feature.Builder = fromFloats(xs.map(_.toFloat)) } /** [[FeatureBuilder]] for output as TensorFlow `Example` type. */ @@ -147,6 +150,8 @@ package object tensorflow { def readStrings(name: String): Example => Option[Seq[String]] = (ex: Example) => toFeature(name, ex).map(v => toStrings(v)) + + def readIntArray(name: String): Example => Option[Array[Int]] = (ex: Example) => toFeature(name, ex).map(v => toInts(v)) } implicit val exampleFlatWriter: FlatWriter[Example] = new FlatWriter[tf.Example] { @@ -196,6 +201,11 @@ package object tensorflow { v.toList.flatMap(values => List(NamedTFFeature(name, fromStrings(values).build()))) } + override def writeIntArray(name: String): Option[Array[Int]] => List[NamedTFFeature] = + (v: Option[Array[Int]]) => { + v.toList.flatMap(values => List(NamedTFFeature(name, fromInts(values).build()))) + } + override def writer: Seq[List[NamedTFFeature]] => Example = (fns: Seq[List[NamedTFFeature]]) => { val builder = Features.newBuilder() From 2acac498eadb22546f30c7d5811ca40c63bc1d63 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sat, 5 Jun 2021 20:11:47 +0100 Subject: [PATCH 2/8] Replace mutable code with fold --- .../featran/transformers/SegmentIndices.scala | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala index 78b52bab..7be350b4 100644 --- a/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala +++ b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala @@ -38,24 +38,14 @@ private[featran] class SegmentIndices(name: String, expectedLength: Int = 0) case Some(x) if (x.length != c) => fb.skip(c) fb.reject(this, FeatureRejection.WrongDimension(c, x.length)) - case Some(x) => { - val copyOfX = x.clone() - - var tmp: Int = 0 - copyOfX(0) = 0 //TODO: Set first element to 0, Guard against head being empty or non-zero - - for (index <- 1 until copyOfX.length) { //Skip 0th! - val inputValue = copyOfX(index) - - if (inputValue == tmp) - copyOfX(index) = copyOfX(index - 1) + 1 - else - copyOfX(index) = 0 - tmp = inputValue + case Some(x) => + val (segmentedIndices, _) = x.zipWithIndex.foldLeft((Array.empty[Int], 0)){ + case (_, (xElement, 0)) => (Array(0), xElement) + case ((segments, previousXElement), (xElement, index)) if (xElement == previousXElement) => (segments ++ Array(segments(index - 1) + 1), xElement) + case ((segments, _), (xElement, _)) => (segments ++ Array(0), xElement) } - fb.addInts(names = names(c), values = copyOfX) - } + fb.addInts(names = names(c), values = segmentedIndices) case None => fb.skip(c) } From 2d4533be105d3ea5144954f377af4d22c5211714 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sat, 19 Jun 2021 16:42:46 +0100 Subject: [PATCH 3/8] Add check that input is valid, that is monotonic and increasing --- .../main/scala/com/spotify/featran/FeatureBuilder.scala | 1 + .../com/spotify/featran/transformers/SegmentIndices.scala | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala b/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala index 0bf5b223..b0c5afa5 100644 --- a/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala +++ b/core/src/main/scala/com/spotify/featran/FeatureBuilder.scala @@ -33,6 +33,7 @@ object FeatureRejection { case class Unseen(labels: Set[String]) extends FeatureRejection case class WrongDimension(expected: Int, actual: Int) extends FeatureRejection case class Outlier(actual: Double) extends FeatureRejection + case class InvalidInput(reason: String) extends FeatureRejection case object Collision extends FeatureRejection } diff --git a/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala index 7be350b4..5deb5265 100644 --- a/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala +++ b/core/src/main/scala/com/spotify/featran/transformers/SegmentIndices.scala @@ -34,10 +34,12 @@ private[featran] class SegmentIndices(name: String, expectedLength: Int = 0) override def featureNames(c: Int): Seq[String] = names(c) override def buildFeatures(a: Option[Array[Int]], c: Int, fb: FeatureBuilder[_]): Unit = a match { - //TODO: Require increasing input (non-strict monotonic) case Some(x) if (x.length != c) => fb.skip(c) fb.reject(this, FeatureRejection.WrongDimension(c, x.length)) + case Some(x) if (!isMonotonic(x)) => + fb.skip(c) + fb.reject(this, FeatureRejection.InvalidInput("Require an increasing sequence of numbers to use SegementIndices.")) case Some(x) => val (segmentedIndices, _) = x.zipWithIndex.foldLeft((Array.empty[Int], 0)){ case (_, (xElement, 0)) => (Array(0), xElement) @@ -58,4 +60,7 @@ private[featran] class SegmentIndices(name: String, expectedLength: Int = 0) override def flatWriter[T](implicit fw: FlatWriter[T]): Option[Array[Int]] => fw.IF = fw.writeIntArray(name) + private def isMonotonic(arr:Array[Int]): Boolean = + (arr, arr.drop(1)).zipped.forall (_ <= _) + } From 1510713dc3ead6e52222ddd9c7b32b1110b9f720 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sat, 19 Jun 2021 16:50:31 +0100 Subject: [PATCH 4/8] Make Int conversion consistent with other types in tensorflow --- .../main/scala/com/spotify/featran/tensorflow/package.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala b/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala index ca4fd122..60922d2d 100644 --- a/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala +++ b/tensorflow/src/main/scala/com/spotify/featran/tensorflow/package.scala @@ -104,9 +104,9 @@ package object tensorflow { def fromStrings(xs: Seq[String]): tf.Feature.Builder = fromByteStrings(xs.map(ByteString.copyFromUtf8)) - def toInts(f: tf.Feature): Array[Int] = toFloats(f).map(_.toInt).toArray + def toInts(f: tf.Feature): Seq[Int] = toFloats(f).map(_.toInt) - def fromInts(xs: Array[Int]): tf.Feature.Builder = fromFloats(xs.map(_.toFloat)) + def fromInts(xs: Seq[Int]): tf.Feature.Builder = fromFloats(xs.map(_.toFloat)) } /** [[FeatureBuilder]] for output as TensorFlow `Example` type. */ @@ -151,7 +151,7 @@ package object tensorflow { def readStrings(name: String): Example => Option[Seq[String]] = (ex: Example) => toFeature(name, ex).map(v => toStrings(v)) - def readIntArray(name: String): Example => Option[Array[Int]] = (ex: Example) => toFeature(name, ex).map(v => toInts(v)) + def readIntArray(name: String): Example => Option[Array[Int]] = (ex: Example) => toFeature(name, ex).map(v => toInts(v).toArray) } implicit val exampleFlatWriter: FlatWriter[Example] = new FlatWriter[tf.Example] { From 8cf2fc5a59859b4cb92d8af0626ff8d1e751bc59 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sat, 19 Jun 2021 16:59:50 +0100 Subject: [PATCH 5/8] Add example usage of SegementIndices in Examples --- examples/src/main/scala/Examples.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/src/main/scala/Examples.scala b/examples/src/main/scala/Examples.scala index 57589e49..5acb9546 100644 --- a/examples/src/main/scala/Examples.scala +++ b/examples/src/main/scala/Examples.scala @@ -179,6 +179,17 @@ object Examples { // Extract from a single record recordExtractor.featureResult(recordGen.sample.get) + // This example applies the SegmentIndices transformer, which requires an increasing array of integers. + val segmentIndicesSpec = FeatureSpec + .of[Array[Int]] + .required(identity)(SegmentIndices("segmented")) + + val f3: FeatureExtractor[List, Array[Int]] = segmentIndicesSpec.extract(List(Array(0,0,1,1,2,2))) + + // Extract feature names and values as `Array[Int]` similar to other examples + println(f3.featureNames.head) + f3.featureValues[Array[Int]].foreach(println) + // # Extraction with Scio // Create input `SCollection[Record]` From f7c5b6107f465b557e9f81954146726c2bae18b2 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sat, 19 Jun 2021 21:22:24 +0100 Subject: [PATCH 6/8] Refactor SegmentIndicesSpec to use Scalacheck prop --- .../transformers/SegmentIndicesSpec.scala | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala index ddd0f8eb..2bfd5942 100644 --- a/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala +++ b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala @@ -1,13 +1,11 @@ package com.spotify.featran.transformers import com.spotify.featran.FeatureSpec -import org.scalatest.flatspec.AnyFlatSpec -import org.scalatest.matchers.should.Matchers +import org.scalacheck.{Arbitrary, Prop} -object SegmentIndicesSpec extends AnyFlatSpec with Matchers { +object SegmentIndicesSpec extends TransformerProp("SegmentIndices") { - def main(args: Array[String]): Unit = { - def randomMonotonicIncreasingArray(): Array[Int] = { + implicit lazy val randomMonotonicIncreasingArray: Arbitrary[Array[Int]] = Arbitrary { val emptyArray = Array.fill(10)(0) for (index <- 1 until emptyArray.length) { if (math.random() > 0.5) { @@ -16,26 +14,23 @@ object SegmentIndicesSpec extends AnyFlatSpec with Matchers { emptyArray(index) = emptyArray(index - 1) } emptyArray - } + } - val hundredRandomArrays = (1 to 100).toList.map(_ => randomMonotonicIncreasingArray()) + property("default") = Prop.forAll { (xs: List[Array[Int]]) => - val segmentedIndices = FeatureSpec + val segmentIndicesSpec = FeatureSpec .of[Array[Int]] .required(identity)(SegmentIndices("segmented")) - val expected = hundredRandomArrays.map { testCase => + val expected = xs.map { testCase => testCase.groupBy(identity).toSeq.sortBy(_._1).flatMap { case (_, sameNumber) => sameNumber.indices.toList } } - val result = segmentedIndices.extract(hundredRandomArrays).featureValues[Seq[Int]] - + val result = segmentIndicesSpec.extract(xs) + .featureValues[Array[Int]] + .map(_.toSeq) - result should equal(expected) + Prop.all(result == expected) } -// property("default") = Prop.forAll(list[Array[Int]].arbitrary) { -// (xs) => //Need test input to be non-strict monotonic increasing -// -// } } From e9ecf8c3772e9dbfb63db538e3361911af7bf3fe Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sun, 20 Jun 2021 13:47:35 +0100 Subject: [PATCH 7/8] Change naming on increasing array generator --- .../featran/transformers/SegmentIndicesSpec.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala index 2bfd5942..d253bfd2 100644 --- a/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala +++ b/core/src/test/scala/com/spotify/featran/transformers/SegmentIndicesSpec.scala @@ -5,15 +5,15 @@ import org.scalacheck.{Arbitrary, Prop} object SegmentIndicesSpec extends TransformerProp("SegmentIndices") { - implicit lazy val randomMonotonicIncreasingArray: Arbitrary[Array[Int]] = Arbitrary { - val emptyArray = Array.fill(10)(0) - for (index <- 1 until emptyArray.length) { + implicit lazy val randomIncreasingArray: Arbitrary[Array[Int]] = Arbitrary { + val increasingArray = Array.fill(10)(0) + for (index <- 1 until increasingArray.length) { if (math.random() > 0.5) { - emptyArray(index) = emptyArray(index - 1) + 1 + increasingArray(index) = increasingArray(index - 1) + 1 } else - emptyArray(index) = emptyArray(index - 1) + increasingArray(index) = increasingArray(index - 1) } - emptyArray + increasingArray } property("default") = Prop.forAll { (xs: List[Array[Int]]) => From 66f4ac4f68c34c937a736a24f1cfc39c0ed749a1 Mon Sep 17 00:00:00 2001 From: Aaron Lee Date: Sun, 20 Jun 2021 15:57:36 +0100 Subject: [PATCH 8/8] Remove redundant CanBuild type class instance for Seq[Int] --- core/src/main/scala/com/spotify/featran/CanBuild.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/src/main/scala/com/spotify/featran/CanBuild.scala b/core/src/main/scala/com/spotify/featran/CanBuild.scala index ae582799..b0d6aab8 100644 --- a/core/src/main/scala/com/spotify/featran/CanBuild.scala +++ b/core/src/main/scala/com/spotify/featran/CanBuild.scala @@ -63,11 +63,7 @@ object CanBuild { override def apply(): mutable.Builder[T, Array[T]] = Array.newBuilder[T] } - implicit def IntSeqCB: CanBuild[Int, Seq] = new CanBuild[Int, Seq] { - override def apply(): mutable.Builder[Int, Seq[Int]] = Seq.newBuilder - } - - implicit def IntArrayCB: CanBuild[Int, Array] = new CanBuild[Int, Array] { + implicit def intArrayCB: CanBuild[Int, Array] = new CanBuild[Int, Array] { override def apply(): mutable.Builder[Int, Array[Int]] = Array.newBuilder[Int] } }